ArneBinder · ArneBinder · Apr 27, 2025 · Apr 27, 2025 · Apr 27, 2025 · Apr 27, 2025
diff --git a/README.md b/README.md
@@ -1,40 +1,23 @@
 # pie-modules
 
-<a href="https://pytorch.org/get-started/locally/"><img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-ee4c2c?logo=pytorch&logoColor=white"></a>
-<a href="https://pytorchlightning.ai/"><img alt="Lightning" src="https://img.shields.io/badge/-Lightning-792ee5?logo=pytorchlightning&logoColor=white"></a>
-<a href="https://github.com/ChristophAlt/pytorch-ie"><img alt="PyTorch-IE" src="https://img.shields.io/badge/-PyTorch--IE-017F2F?style=flat&logo=github&labelColor=gray"></a><br>
+<a href="https://github.com/ArneBinder/pie-core"><img alt="PythonIE" src="https://img.shields.io/badge/-PythonIE-017F2F?style=flat&logo=github&labelColor=gray"></a><br>
 
 [![PyPI](https://img.shields.io/pypi/v/pie-modules.svg)][pypi status]
 [![Tests](https://github.com/arnebinder/pie-modules/workflows/Tests/badge.svg)][tests]
 [![Codecov](https://codecov.io/gh/arnebinder/pie-modules/branch/main/graph/badge.svg)][codecov]
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit]
 [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
 
-Model-, taskmodule-, and metric-implementations as well as document processing utilities for [PyTorch-IE](https://github.com/ChristophAlt/pytorch-ie).
+Annotation-, document- and metric implementations as well as utilities for [Python-IE](https://github.com/ArneBinder/pie-core).
 
-Available models:
+Available annotation types: see [here](src/pie_modules/annotations.py).
 
-- [SimpleSequenceClassificationModel](src/pie_modules/models/simple_sequence_classification.py)
-- [SequenceClassificationModelWithPooler](src/pie_modules/models/sequence_classification_with_pooler.py)
-- [SequencePairSimilarityModelWithPooler](src/pie_modules/models/sequence_classification_with_pooler.py)
-- [SimpleTokenClassificationModel](src/pie_modules/models/simple_token_classification.py)
-- [TokenClassificationModelWithSeq2SeqEncoderAndCrf](src/pie_modules/models/token_classification_with_seq2seq_encoder_and_crf.py)
-- [SimpleExtractiveQuestionAnsweringModel](src/pie_modules/models/simple_extractive_question_answering.py)
-- [SimpleGenerativeModel](src/pie_modules/models/simple_generative.py)
-- [SpanTupleClassificationModel](src/pie_modules/models/span_tuple_classification.py)
-
-Available taskmodules:
-
-- [RETextClassificationWithIndicesTaskModule](src/pie_modules/taskmodules/re_text_classification_with_indices.py)
-- [CrossTextBinaryCorefTaskModule](src/pie_modules/taskmodules/cross_text_binary_coref.py)
-- [LabeledSpanExtractionByTokenClassificationTaskModule](src/pie_modules/taskmodules/labeled_span_extraction_by_token_classification.py)
-- [ExtractiveQuestionAnsweringTaskModule](src/pie_modules/taskmodules/extractive_question_answering.py)
-- [TextToTextTaskModule](src/pie_modules/taskmodules/text_to_text.py)
-- [PointerNetworkTaskModuleForEnd2EndRE](src/pie_modules/taskmodules/pointer_network_for_end2end_re.py)
-- [RESpanPairClassificationTaskModule](src/pie_modules/taskmodules/re_span_pair_classification.py)
+Available document types: see [here](src/pie_modules/documents.py).
 
 Available metrics:
 
+- [F1Metric](src/pie_modules/metrics/f1.py)
+- [ConfusionMatrix](src/pie_modules/metrics/confusion_matrix.py)
 - [SpanLengthCollector](src/pie_modules/metrics/span_length_collector.py)
 - [RelationArgumentDistanceCollector](src/pie_modules/metrics/relation_argument_distance_collector.py)
 - [SpanCoverageCollector](src/pie_modules/metrics/span_coverage_collector.py)
@@ -48,7 +31,7 @@ Document processing utilities:
 - [RelationArgumentSorter](src/pie_modules/document/processing/relation_argument_sorter.py)
 - [SentenceSplitter](src/pie_modules/document/processing/sentence_splitter.py)
 - [TextSpanTrimmer](src/pie_modules/document/processing/text_span_trimmer.py)
-- [tokenize_document](src/pie_modules/document/processing/tokenization.py)
+- [tokenization utils](src/pie_modules/document/processing/tokenization.py), e.g., `text_based_document_to_token_based` and `token_based_document_to_text_based`
 
 ## Setup
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "pie-modules"
 version = "0.15.9"
-description = "Model and Taskmodule implementations for PyTorch-IE"
+description = "Utility modules for Python-IE"
 authors = ["Arne Binder <arne.binder@dfki.de>"]
 readme = "README.md"
 homepage = "https://github.com/arnebinder/pie-modules"
@@ -24,22 +24,15 @@ classifiers = [
 
 [tool.poetry.dependencies]
 python = "^3.9"
-# TODO: remove and use pie-core instead
-pytorch-ie = ">=0.31.9,<0.32.0"
-pytorch-lightning = "^2.1.0"
-torchmetrics = "^1"
-# >=4.35 because of BartModelWithDecoderPositionIds, <4.37 because of generation config
-# created from model config in BartAsPointerNetwork
-transformers = ">=4.35.0,<4.37.0"
+pie-core = ">=0.2.0,<0.3.0"
+# for show_as_markdown in metrics
+pandas = ">=2.0.3,<3.0.0"
 
 [tool.poetry.group.dev.dependencies]
-torch = {version = "^2.1.0+cpu", source = "pytorch"}
 pytest = "^7.4.2"
 pytest-cov = "^4.1.0"
 pre-commit = "^3.4.0"
 tabulate = "^0.9"
-# for TokenClassificationModelWithSeq2SeqEncoderAndCrf
-pytorch-crf = ">=0.7.2"
 # for rouge metric (tests only) and for NltkSentenceSplitter
 nltk = "^3.8.1"
 # for NltkSentenceSplitter
@@ -60,7 +53,6 @@ name = "pre-release"
 url = "https://test.pypi.org/simple/"
 priority = "explicit"
 
-
 [tool.pytest.ini_options]
 addopts = [
   "--color=yes",

diff --git a/src/pie_modules/annotations.py b/src/pie_modules/annotations.py
@@ -1,22 +1,192 @@
 import dataclasses
-from typing import Optional
+from dataclasses import dataclass, field
+from typing import Any, Optional, Tuple
 
 from pie_core import Annotation
 
-# re-export all annotations from pytorch_ie to have a single entry point
-from pytorch_ie.annotations import (
-    BinaryRelation,
-    Label,
-    LabeledMultiSpan,
-    LabeledSpan,
-    MultiLabel,
-    MultiLabeledBinaryRelation,
-    MultiLabeledSpan,
-    MultiSpan,
-    NaryRelation,
-    Span,
-    _post_init_single_label,
-)
+
+def _post_init_single_label(self):
+    if not isinstance(self.label, str):
+        raise ValueError("label must be a single string.")
+
+    if not isinstance(self.score, float):
+        raise ValueError("score must be a single float.")
+
+
+def _post_init_multi_label(self):
+    if self.score is None:
+        score = tuple([1.0] * len(self.label))
+        object.__setattr__(self, "score", score)
+
+    if not isinstance(self.label, tuple):
+        object.__setattr__(self, "label", tuple(self.label))
+
+    if not isinstance(self.score, tuple):
+        object.__setattr__(self, "score", tuple(self.score))
+
+    if len(self.label) != len(self.score):
+        raise ValueError(
+            f"Number of labels ({len(self.label)}) and scores ({len(self.score)}) must be equal."
+        )
+
+
+def _post_init_multi_span(self):
+    if isinstance(self.slices, list):
+        object.__setattr__(self, "slices", tuple(tuple(s) for s in self.slices))
+
+
+def _post_init_arguments_and_roles(self):
+    if len(self.arguments) != len(self.roles):
+        raise ValueError(
+            f"Number of arguments ({len(self.arguments)}) and roles ({len(self.roles)}) must be equal"
+        )
+    if not isinstance(self.arguments, tuple):
+        object.__setattr__(self, "arguments", tuple(self.arguments))
+    if not isinstance(self.roles, tuple):
+        object.__setattr__(self, "roles", tuple(self.roles))
+
+
+@dataclass(eq=True, frozen=True)
+class Label(Annotation):
+    label: str
+    score: float = field(default=1.0, compare=False)
+
+    def __post_init__(self) -> None:
+        _post_init_single_label(self)
+
+    def resolve(self) -> Any:
+        return self.label
+
+
+@dataclass(eq=True, frozen=True)
+class MultiLabel(Annotation):
+    label: Tuple[str, ...]
+    score: Optional[Tuple[float, ...]] = field(default=None, compare=False)
+
+    def __post_init__(self) -> None:
+        _post_init_multi_label(self)
+
+    def resolve(self) -> Any:
+        return self.label
+
+
+@dataclass(eq=True, frozen=True)
+class Span(Annotation):
+    start: int
+    end: int
+
+    def __str__(self) -> str:
+        if not self.is_attached:
+            return super().__str__()
+        return str(self.target[self.start : self.end])
+
+    def resolve(self) -> Any:
+        if self.is_attached:
+            return self.target[self.start : self.end]
+        else:
+            raise ValueError(f"{self} is not attached to a target.")
+
+
+@dataclass(eq=True, frozen=True)
+class LabeledSpan(Span):
+    label: str
+    score: float = field(default=1.0, compare=False)
+
+    def __post_init__(self) -> None:
+        _post_init_single_label(self)
+
+    def resolve(self) -> Any:
+        return self.label, super().resolve()
+
+
+@dataclass(eq=True, frozen=True)
+class MultiLabeledSpan(Span):
+    label: Tuple[str, ...]
+    score: Optional[Tuple[float, ...]] = field(default=None, compare=False)
+
+    def __post_init__(self) -> None:
+        _post_init_multi_label(self)
+
+    def resolve(self) -> Any:
+        return self.label, super().resolve()
+
+
+@dataclass(eq=True, frozen=True)
+class MultiSpan(Annotation):
+    slices: Tuple[Tuple[int, int], ...]
+
+    def __post_init__(self) -> None:
+        _post_init_multi_span(self)
+
+    def __str__(self) -> str:
+        if not self.is_attached:
+            return super().__str__()
+        return str(tuple(self.target[start:end] for start, end in self.slices))
+
+    def resolve(self) -> Any:
+        if self.is_attached:
+            return tuple(self.target[start:end] for start, end in self.slices)
+        else:
+            raise ValueError(f"{self} is not attached to a target.")
+
+
+@dataclass(eq=True, frozen=True)
+class LabeledMultiSpan(MultiSpan):
+    label: str
+    score: float = field(default=1.0, compare=False)
+
+    def __post_init__(self) -> None:
+        super().__post_init__()
+        _post_init_single_label(self)
+
+    def resolve(self) -> Any:
+        return self.label, super().resolve()
+
+
+@dataclass(eq=True, frozen=True)
+class BinaryRelation(Annotation):
+    head: Annotation
+    tail: Annotation
+    label: str
+    score: float = field(default=1.0, compare=False)
+
+    def __post_init__(self) -> None:
+        _post_init_single_label(self)
+
+    def resolve(self) -> Any:
+        return self.label, (self.head.resolve(), self.tail.resolve())
+
+
+@dataclass(eq=True, frozen=True)
+class MultiLabeledBinaryRelation(Annotation):
+    head: Annotation
+    tail: Annotation
+    label: Tuple[str, ...]
+    score: Optional[Tuple[float, ...]] = field(default=None, compare=False)
+
+    def __post_init__(self) -> None:
+        _post_init_multi_label(self)
+
+    def resolve(self) -> Any:
+        return self.label, (self.head.resolve(), self.tail.resolve())
+
+
+@dataclass(eq=True, frozen=True)
+class NaryRelation(Annotation):
+    arguments: Tuple[Annotation, ...]
+    roles: Tuple[str, ...]
+    label: str
+    score: float = field(default=1.0, compare=False)
+
+    def __post_init__(self) -> None:
+        _post_init_arguments_and_roles(self)
+        _post_init_single_label(self)
+
+    def resolve(self) -> Any:
+        return (
+            self.label,
+            tuple((role, arg.resolve()) for arg, role in zip(self.arguments, self.roles)),
+        )
 
 
 @dataclasses.dataclass(eq=True, frozen=True)

diff --git a/src/pie_modules/document/processing/__init__.py b/src/pie_modules/document/processing/__init__.py
@@ -7,5 +7,4 @@
 from .tokenization import (
     text_based_document_to_token_based,
     token_based_document_to_text_based,
-    tokenize_document,
 )
diff --git a/src/pie_modules/document/processing/text_span_trimmer.py b/src/pie_modules/document/processing/text_span_trimmer.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from typing import TypeVar
+from typing import Any, Dict, TypeVar
 
 from pie_core import AnnotationLayer, Document
 
@@ -45,6 +45,7 @@ def trim_text_spans(
 
     text = spans.target
 
+    original_kwargs: dict[str, Any]
     for span in spans:
         if isinstance(span, Span):
             starts_and_ends = [(span.start, span.end)]
@@ -99,6 +100,7 @@ def trim_text_spans(
                     )
                 removed_span_ids.append(span._id)
                 continue
+        new_kwargs: dict[str, Any]
         if isinstance(span, Span):
             if not len(new_starts_and_ends) == 1:
                 raise ValueError(f"Expected one span, got {len(new_starts_and_ends)}")