diff --git a/checkpoint-1008000/config.json b/checkpoint-1008000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1008000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1008000/generation_config.json b/checkpoint-1008000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1008000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1020000/config.json b/checkpoint-1020000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1020000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1020000/generation_config.json b/checkpoint-1020000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1020000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1032000/config.json b/checkpoint-1032000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1032000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1032000/generation_config.json b/checkpoint-1032000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1032000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1044000/config.json b/checkpoint-1044000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1044000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1044000/generation_config.json b/checkpoint-1044000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1044000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1056000/config.json b/checkpoint-1056000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1056000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1056000/generation_config.json b/checkpoint-1056000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1056000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1068000/config.json b/checkpoint-1068000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1068000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1068000/generation_config.json b/checkpoint-1068000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1068000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1080000/config.json b/checkpoint-1080000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1080000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1080000/generation_config.json b/checkpoint-1080000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1080000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1092000/config.json b/checkpoint-1092000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1092000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1092000/generation_config.json b/checkpoint-1092000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1092000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1104000/config.json b/checkpoint-1104000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1104000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1104000/generation_config.json b/checkpoint-1104000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1104000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1116000/config.json b/checkpoint-1116000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1116000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1116000/generation_config.json b/checkpoint-1116000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1116000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1128000/config.json b/checkpoint-1128000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1128000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1128000/generation_config.json b/checkpoint-1128000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1128000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1140000/config.json b/checkpoint-1140000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1140000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1140000/generation_config.json b/checkpoint-1140000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1140000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1152000/config.json b/checkpoint-1152000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1152000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1152000/generation_config.json b/checkpoint-1152000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1152000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1164000/config.json b/checkpoint-1164000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1164000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1164000/generation_config.json b/checkpoint-1164000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1164000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1176000/config.json b/checkpoint-1176000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1176000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1176000/generation_config.json b/checkpoint-1176000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1176000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1188000/config.json b/checkpoint-1188000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1188000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1188000/generation_config.json b/checkpoint-1188000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1188000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1200000/config.json b/checkpoint-1200000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1200000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1200000/generation_config.json b/checkpoint-1200000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1200000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1212000/config.json b/checkpoint-1212000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1212000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1212000/generation_config.json b/checkpoint-1212000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1212000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1212000/scheduler.pt b/checkpoint-1212000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa9ebf64004525d0626134e24efd9e1c9ca42b6e --- /dev/null +++ b/checkpoint-1212000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7684b4b0b982d908c7850582bc9a24836ddbd4f22e61ff172a78c8e18cb72f2e +size 1465 diff --git a/checkpoint-1224000/config.json b/checkpoint-1224000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1224000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1224000/generation_config.json b/checkpoint-1224000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1224000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1236000/config.json b/checkpoint-1236000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-1236000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-1236000/generation_config.json b/checkpoint-1236000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1236000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1248000/generation_config.json b/checkpoint-1248000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-1248000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-1248000/rng_state_3.pth b/checkpoint-1248000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..84660ff39da893f3edf58f4a54971ca424b5ae70 --- /dev/null +++ b/checkpoint-1248000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9967425ebcaee80d9b518fa0244d52f739b1b983d87cda71d5fede0c073e9d3b +size 16389 diff --git a/checkpoint-1260000/rng_state_0.pth b/checkpoint-1260000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4b0bea5e7a82c1886ccb79d111257405625494a6 --- /dev/null +++ b/checkpoint-1260000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f92cf63e0989759370d24108b469c492c12202403f036015307ce49f12cedc +size 16389 diff --git a/checkpoint-1260000/rng_state_3.pth b/checkpoint-1260000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..84660ff39da893f3edf58f4a54971ca424b5ae70 --- /dev/null +++ b/checkpoint-1260000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9967425ebcaee80d9b518fa0244d52f739b1b983d87cda71d5fede0c073e9d3b +size 16389 diff --git a/checkpoint-1260000/rng_state_7.pth b/checkpoint-1260000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad30a51583bfc3be86c6b6aa3bc2e815019b3e77 --- /dev/null +++ b/checkpoint-1260000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a376268a55d6ee10c371c06aa952334c4c6a1af9ea2d71b1951a57367a0c6722 +size 16389 diff --git a/checkpoint-1268031/rng_state_0.pth b/checkpoint-1268031/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4b0bea5e7a82c1886ccb79d111257405625494a6 --- /dev/null +++ b/checkpoint-1268031/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f92cf63e0989759370d24108b469c492c12202403f036015307ce49f12cedc +size 16389 diff --git a/checkpoint-1268031/rng_state_3.pth b/checkpoint-1268031/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..84660ff39da893f3edf58f4a54971ca424b5ae70 --- /dev/null +++ b/checkpoint-1268031/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9967425ebcaee80d9b518fa0244d52f739b1b983d87cda71d5fede0c073e9d3b +size 16389 diff --git a/checkpoint-1268031/rng_state_4.pth b/checkpoint-1268031/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd63d0f91d990383a77a110da21707c2e4161995 --- /dev/null +++ b/checkpoint-1268031/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469900fd39c667ffbd49c3c407c0ba317a1e9f5f9339a99b5d38423b7d0ce6d4 +size 16389 diff --git a/checkpoint-1268031/scheduler.pt b/checkpoint-1268031/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..da63e00362c7aa9fa481ca38d88734a5991ea774 --- /dev/null +++ b/checkpoint-1268031/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d77573338550af5cf759967927a0bf91260c39a68e2676a06651fb012b32e67 +size 1465 diff --git a/checkpoint-696000/config.json b/checkpoint-696000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-696000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-696000/generation_config.json b/checkpoint-696000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-696000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-708000/generation_config.json b/checkpoint-708000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-708000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-720000/config.json b/checkpoint-720000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-720000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-732000/config.json b/checkpoint-732000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-732000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-732000/generation_config.json b/checkpoint-732000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-732000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-744000/config.json b/checkpoint-744000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-744000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-744000/generation_config.json b/checkpoint-744000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-744000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-756000/config.json b/checkpoint-756000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-756000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-756000/generation_config.json b/checkpoint-756000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-756000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-768000/config.json b/checkpoint-768000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-768000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-768000/generation_config.json b/checkpoint-768000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-768000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-780000/config.json b/checkpoint-780000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-780000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-780000/generation_config.json b/checkpoint-780000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-780000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-792000/config.json b/checkpoint-792000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-792000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-792000/generation_config.json b/checkpoint-792000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-792000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-804000/config.json b/checkpoint-804000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-804000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-804000/generation_config.json b/checkpoint-804000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-804000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-816000/config.json b/checkpoint-816000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-816000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-816000/generation_config.json b/checkpoint-816000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-816000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-828000/config.json b/checkpoint-828000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-828000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-828000/generation_config.json b/checkpoint-828000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-828000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-840000/config.json b/checkpoint-840000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-840000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-840000/generation_config.json b/checkpoint-840000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-840000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-852000/config.json b/checkpoint-852000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-852000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-852000/generation_config.json b/checkpoint-852000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-852000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-864000/config.json b/checkpoint-864000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-864000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-864000/generation_config.json b/checkpoint-864000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-864000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-876000/config.json b/checkpoint-876000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-876000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-876000/generation_config.json b/checkpoint-876000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-876000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-888000/config.json b/checkpoint-888000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-888000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-888000/generation_config.json b/checkpoint-888000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-888000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-900000/config.json b/checkpoint-900000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-900000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-900000/generation_config.json b/checkpoint-900000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-900000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-912000/config.json b/checkpoint-912000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-912000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-912000/generation_config.json b/checkpoint-912000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-912000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-924000/config.json b/checkpoint-924000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-924000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-924000/generation_config.json b/checkpoint-924000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-924000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-936000/config.json b/checkpoint-936000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-936000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-936000/generation_config.json b/checkpoint-936000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-936000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-948000/config.json b/checkpoint-948000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-948000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-948000/generation_config.json b/checkpoint-948000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-948000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-960000/config.json b/checkpoint-960000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-960000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-960000/generation_config.json b/checkpoint-960000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-960000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-972000/config.json b/checkpoint-972000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-972000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-972000/generation_config.json b/checkpoint-972000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-972000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-984000/config.json b/checkpoint-984000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-984000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-984000/generation_config.json b/checkpoint-984000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-984000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +} diff --git a/checkpoint-996000/config.json b/checkpoint-996000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db0da389ddd955a2a978ea931734d0282b929745 --- /dev/null +++ b/checkpoint-996000/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 93083 +} diff --git a/checkpoint-996000/generation_config.json b/checkpoint-996000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2de3266764390423945f28f14b42759f7842439f --- /dev/null +++ b/checkpoint-996000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0" +}