From 22a05e8c590632f6eb4feee91f0dc6a0386892d3 Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Thu, 7 Sep 2023 20:00:35 +0200 Subject: [PATCH] add pad_token and sep_token, if not yet available and necessary --- .../re_text_classification_with_indices.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/pie_models/taskmodules/re_text_classification_with_indices.py b/src/pie_models/taskmodules/re_text_classification_with_indices.py index 286031431..5ed9b1597 100644 --- a/src/pie_models/taskmodules/re_text_classification_with_indices.py +++ b/src/pie_models/taskmodules/re_text_classification_with_indices.py @@ -272,6 +272,19 @@ def _post_prepare(self): self.id_to_label = {v: k for k, v in self.label_to_id.items()} + if self.tokenizer.pad_token is None: + logger.warning( + "The tokenizer has no pad token, but this is required to pad the batches. We add a pad token to " + "the tokenizer." + ) + self.tokenizer.add_special_tokens({"pad_token": "[PAD]"}) + if self.append_markers and self.tokenizer.sep_token is None: + logger.warning( + "The tokenizer has no sep token, but this is required if append_markers=True. We add a sep token " + "to the tokenizer." + ) + self.tokenizer.add_special_tokens({"sep_token": "[SEP]"}) + def _create_relation_candidates( self, document: Document,