Introduction
这是一个基于bert-base-chinese微调的情感分类的模型,准确率大概为94.6% ,数据集为sentiment-classification,一个学习项目,旨在学习NLP的基础知识以及了解hugging face生态。
代码请查看Sentiment-Classification,具体的细节可以查看我的博客
Usage
from transformers.models.auto.tokenization_auto import AutoTokenizer
from transformers.models.auto.modeling_auto import AutoModelForSequenceClassification
from transformers.training_args import TrainingArguments
from transformers.data.data_collator import DataCollatorWithPadding
from transformers.trainer import Trainer
from transformers.trainer_utils import EvalPrediction
from datasets import load_dataset, Features, Value, ClassLabel
model = AutoModelForSequenceClassification.from_pretrained("left0ver/bert-base-chinese-finetune-sentiment-classification",num_labels=2)
tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese",return_tensors="pt")
dataset = load_dataset("left0ver/sentiment-classification")
tokenized_dataset = dataset.map(
lambda examples: tokenizer(examples["text"],truncation=True,max_length=512),
batched=True,
remove_columns=["text"],
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer,padding=True)
training_args = TrainingArguments(
output_dir="./char_based_bert_finetune",
num_train_epochs =10,
eval_strategy = "epoch",
per_device_train_batch_size =64,
per_device_eval_batch_size=32,
gradient_accumulation_steps =1,
learning_rate = 1e-6,
lr_scheduler_type = "cosine",
logging_strategy= "steps",
logging_steps = 20,
save_strategy = "epoch",
save_total_limit = 4,
seed = 42,
data_seed = 42,
load_best_model_at_end=True,
# 指定label的字段
label_names=["labels"],
run_name="char_based_bert_finetune",
report_to="wandb",
metric_for_best_model="eval_accuracy",
greater_is_better=True,
optim="adamw_torch",
# eval_on_start=True, # just for test eval
)
def compute_metrics(eval_pred:EvalPrediction):
predictions, labels = eval_pred
accuracy = (predictions == labels).mean()
return {
'accuracy': accuracy,
}
def preprocess_logits_for_metrics(logits, labels):
predictions = logits.argmax(axis=1)
return predictions
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset["train"],
eval_dataset=tokenized_dataset["validation"],
compute_metrics=compute_metrics,
# tokenizer = tokenizer,
processing_class = tokenizer,
data_collator=data_collator,
preprocess_logits_for_metrics =preprocess_logits_for_metrics,
)
trainer.evaluate()
- Downloads last month
- 16
Model tree for left0ver/bert-base-chinese-finetune-sentiment-classification
Base model
google-bert/bert-base-chinese