from datasets import load_metric
model = AutoModelForTokenClassification.from_pretrained("distilbert-base-uncased", num_labels=len(label_list))
args = TrainingArguments(
evaluation_strategy = "epoch",
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
data_collator = DataCollatorForTokenClassification(tokenizer)
metric = load_metric("seqeval")
"""Compute some quick metrics for model performance"""
predictions = np.argmax(predictions, axis=2)
# Skip -100 (the padding token for start and end of sequences)
true_predictions = [[label_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
true_labels = [[label_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
results = metric.compute(predictions=true_predictions, references=true_labels)
return {"precision": results["overall_precision"], "recall": results["overall_recall"], "f1": results["overall_f1"], "accuracy": results["overall_accuracy"]}
train_dataset=train_tokenized_dataset,
eval_dataset=test_tokenized_dataset,
data_collator=data_collator,
compute_metrics=compute_metrics