From 547e5081d0b98b79344d8ca5c321fe55ab4ec981 Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Sun, 27 Apr 2025 14:08:14 +0200 Subject: [PATCH 01/11] remove pytorch-ie in favor of pie-core (remove model and taskmodule implementations; add annotations and documents; minor fixes) --- pyproject.toml | 24 +- src/pie_modules/annotations.py | 200 +- .../document/processing/sentence_splitter.py | 1 + .../document/processing/text_span_trimmer.py | 4 +- .../document/processing/tokenization.py | 11 +- src/pie_modules/documents.py | 188 +- .../relation_argument_distance_collector.py | 2 +- .../metrics/span_length_collector.py | 2 +- src/pie_modules/models/__init__.py | 12 - .../models/base_models/__init__.py | 2 - .../base_models/bart_as_pointer_network.py | 476 --- .../bart_with_decoder_position_ids.py | 536 --- src/pie_modules/models/common/__init__.py | 4 - .../models/common/has_taskmodule.py | 20 - .../models/common/model_with_boilerplate.py | 80 - .../model_with_metrics_from_taskmodule.py | 152 - src/pie_modules/models/common/stages.py | 3 - src/pie_modules/models/components/__init__.py | 0 .../models/components/pointer_head.py | 357 -- src/pie_modules/models/components/pooler.py | 274 -- .../models/components/seq2seq_encoder.py | 77 - src/pie_modules/models/interface.py | 12 - .../sequence_classification_with_pooler.py | 362 -- .../simple_extractive_question_answering.py | 165 - src/pie_modules/models/simple_generative.py | 196 - .../models/simple_sequence_classification.py | 140 - .../models/simple_token_classification.py | 97 - .../models/span_tuple_classification.py | 457 --- ...sification_with_seq2seq_encoder_and_crf.py | 247 -- src/pie_modules/taskmodules/__init__.py | 11 - .../taskmodules/common/__init__.py | 4 - .../taskmodules/common/interfaces.py | 63 - src/pie_modules/taskmodules/common/mixins.py | 297 -- .../taskmodule_with_document_converter.py | 117 - src/pie_modules/taskmodules/common/utils.py | 32 - .../taskmodules/cross_text_binary_coref.py | 292 -- .../extractive_question_answering.py | 239 -- ...span_extraction_by_token_classification.py | 468 --- .../taskmodules/metrics/__init__.py | 7 - src/pie_modules/taskmodules/metrics/common.py | 38 - ...n_recall_and_f1_for_labeled_annotations.py | 137 - ...unbatch_and_decode_with_errors_function.py | 147 - .../wrapped_metric_with_prepare_function.py | 129 - .../taskmodules/pointer_network/__init__.py | 0 .../annotation_encoder_decoder.py | 397 --- .../pointer_network/logits_processor.py | 67 - .../pointer_network_for_end2end_re.py | 865 ----- .../re_span_pair_classification.py | 829 ----- .../re_text_classification_with_indices.py | 1508 -------- src/pie_modules/taskmodules/text_to_text.py | 458 --- .../test_relation_argument_sorter.py | 4 +- .../document/processing/test_tokenization.py | 4 +- tests/metrics/test_span_length_collector.py | 2 +- tests/models/__init__.py | 2 - tests/models/base_models/__init__.py | 0 .../test_bart_as_pointer_network.py | 983 ----- .../test_bart_with_decoder_position_ids.py | 301 -- tests/models/components/__init__.py | 0 tests/models/components/test_pointer_head.py | 713 ---- tests/models/components/test_pooler.py | 218 -- .../models/components/test_seq2seq_encoder.py | 93 - .../test_extractive_question_answering.py | 245 -- ...est_sequence_classification_with_pooler.py | 578 --- ...uence_pair_similarity_model_with_pooler.py | 326 -- tests/models/test_simple_generative.py | 439 --- .../test_simple_sequence_classification.py | 550 --- .../test_simple_token_classification.py | 453 --- .../models/test_span_tuple_classification.py | 549 --- ...sification_with_seq2seq_encoder_and_crf.py | 639 ---- tests/taskmodules/__init__.py | 0 tests/taskmodules/common/__init__.py | 0 tests/taskmodules/common/test_interfaces.py | 27 - tests/taskmodules/common/test_mixins.py | 166 - ...test_taskmodule_with_document_converter.py | 163 - tests/taskmodules/common/test_utils.py | 17 - tests/taskmodules/metrics/__init__.py | 0 ...n_recall_and_f1_for_labeled_annotations.py | 151 - ...unbatch_and_decode_with_errors_function.py | 144 - ...st_wrapped_metric_with_prepare_function.py | 137 - tests/taskmodules/pointer_network/__init__.py | 0 .../test_annotation_encoder_decoder.py | 441 --- .../pointer_network/test_logits_processor.py | 80 - .../test_cross_text_binary_coref.py | 395 --- .../test_extractive_question_answering.py | 277 -- ...span_extraction_by_token_classification.py | 883 ----- .../test_pointer_network_for_end2end_re.py | 1313 ------- .../test_re_span_pair_classification.py | 614 ---- ...est_re_text_classification_with_indices.py | 3159 ----------------- tests/taskmodules/test_text2text.py | 275 -- .../test_text2text_with_guidance.py | 240 -- 90 files changed, 370 insertions(+), 24417 deletions(-) delete mode 100644 src/pie_modules/models/__init__.py delete mode 100644 src/pie_modules/models/base_models/__init__.py delete mode 100644 src/pie_modules/models/base_models/bart_with_decoder_position_ids.py delete mode 100644 src/pie_modules/models/common/__init__.py delete mode 100644 src/pie_modules/models/common/has_taskmodule.py delete mode 100644 src/pie_modules/models/common/model_with_boilerplate.py delete mode 100644 src/pie_modules/models/common/model_with_metrics_from_taskmodule.py delete mode 100644 src/pie_modules/models/common/stages.py delete mode 100644 src/pie_modules/models/components/__init__.py delete mode 100644 src/pie_modules/models/components/pointer_head.py delete mode 100644 src/pie_modules/models/components/seq2seq_encoder.py delete mode 100644 src/pie_modules/models/interface.py delete mode 100644 src/pie_modules/models/sequence_classification_with_pooler.py delete mode 100644 src/pie_modules/models/simple_extractive_question_answering.py delete mode 100644 src/pie_modules/models/simple_generative.py delete mode 100644 src/pie_modules/models/simple_sequence_classification.py delete mode 100644 src/pie_modules/models/simple_token_classification.py delete mode 100644 src/pie_modules/models/span_tuple_classification.py delete mode 100644 src/pie_modules/models/token_classification_with_seq2seq_encoder_and_crf.py delete mode 100644 src/pie_modules/taskmodules/__init__.py delete mode 100644 src/pie_modules/taskmodules/common/interfaces.py delete mode 100644 src/pie_modules/taskmodules/common/mixins.py delete mode 100644 src/pie_modules/taskmodules/common/taskmodule_with_document_converter.py delete mode 100644 src/pie_modules/taskmodules/common/utils.py delete mode 100644 src/pie_modules/taskmodules/extractive_question_answering.py delete mode 100644 src/pie_modules/taskmodules/labeled_span_extraction_by_token_classification.py delete mode 100644 src/pie_modules/taskmodules/metrics/__init__.py delete mode 100644 src/pie_modules/taskmodules/metrics/common.py delete mode 100644 src/pie_modules/taskmodules/metrics/precision_recall_and_f1_for_labeled_annotations.py delete mode 100644 src/pie_modules/taskmodules/metrics/wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py delete mode 100644 src/pie_modules/taskmodules/metrics/wrapped_metric_with_prepare_function.py delete mode 100644 src/pie_modules/taskmodules/pointer_network/__init__.py delete mode 100644 src/pie_modules/taskmodules/pointer_network/annotation_encoder_decoder.py delete mode 100644 src/pie_modules/taskmodules/pointer_network/logits_processor.py delete mode 100644 src/pie_modules/taskmodules/pointer_network_for_end2end_re.py delete mode 100644 src/pie_modules/taskmodules/re_span_pair_classification.py delete mode 100644 src/pie_modules/taskmodules/re_text_classification_with_indices.py delete mode 100644 src/pie_modules/taskmodules/text_to_text.py delete mode 100644 tests/models/__init__.py delete mode 100644 tests/models/base_models/__init__.py delete mode 100644 tests/models/base_models/test_bart_as_pointer_network.py delete mode 100644 tests/models/base_models/test_bart_with_decoder_position_ids.py delete mode 100644 tests/models/components/__init__.py delete mode 100644 tests/models/components/test_pointer_head.py delete mode 100644 tests/models/components/test_seq2seq_encoder.py delete mode 100644 tests/models/test_extractive_question_answering.py delete mode 100644 tests/models/test_sequence_classification_with_pooler.py delete mode 100644 tests/models/test_sequence_pair_similarity_model_with_pooler.py delete mode 100644 tests/models/test_simple_generative.py delete mode 100644 tests/models/test_simple_sequence_classification.py delete mode 100644 tests/models/test_simple_token_classification.py delete mode 100644 tests/models/test_span_tuple_classification.py delete mode 100644 tests/models/test_token_classification_with_seq2seq_encoder_and_crf.py delete mode 100644 tests/taskmodules/__init__.py delete mode 100644 tests/taskmodules/common/__init__.py delete mode 100644 tests/taskmodules/common/test_interfaces.py delete mode 100644 tests/taskmodules/common/test_mixins.py delete mode 100644 tests/taskmodules/common/test_taskmodule_with_document_converter.py delete mode 100644 tests/taskmodules/common/test_utils.py delete mode 100644 tests/taskmodules/metrics/__init__.py delete mode 100644 tests/taskmodules/metrics/test_precision_recall_and_f1_for_labeled_annotations.py delete mode 100644 tests/taskmodules/metrics/test_wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py delete mode 100644 tests/taskmodules/metrics/test_wrapped_metric_with_prepare_function.py delete mode 100644 tests/taskmodules/pointer_network/__init__.py delete mode 100644 tests/taskmodules/pointer_network/test_annotation_encoder_decoder.py delete mode 100644 tests/taskmodules/pointer_network/test_logits_processor.py delete mode 100644 tests/taskmodules/test_cross_text_binary_coref.py delete mode 100644 tests/taskmodules/test_extractive_question_answering.py delete mode 100644 tests/taskmodules/test_labeled_span_extraction_by_token_classification.py delete mode 100644 tests/taskmodules/test_pointer_network_for_end2end_re.py delete mode 100644 tests/taskmodules/test_re_span_pair_classification.py delete mode 100644 tests/taskmodules/test_re_text_classification_with_indices.py delete mode 100644 tests/taskmodules/test_text2text.py delete mode 100644 tests/taskmodules/test_text2text_with_guidance.py diff --git a/pyproject.toml b/pyproject.toml index 9af0c843c..4c5e84c4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pie-modules" -version = "0.15.9" -description = "Model and Taskmodule implementations for PyTorch-IE" +version = "0.15.6" +description = "Utility modules for Python-IE" authors = ["Arne Binder "] readme = "README.md" homepage = "https://github.com/arnebinder/pie-modules" @@ -24,22 +24,16 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.9" -# TODO: remove and use pie-core instead -pytorch-ie = ">=0.31.9,<0.32.0" -pytorch-lightning = "^2.1.0" -torchmetrics = "^1" -# >=4.35 because of BartModelWithDecoderPositionIds, <4.37 because of generation config -# created from model config in BartAsPointerNetwork -transformers = ">=4.35.0,<4.37.0" +pie-core = ">=0.1.2,<0.2.0" +pandas = ">=2.0.3,<3.0.0" +# required for AutoTokenizer, PreTrainedTokenizer, BatchEncoding +transformers = ">=4.0.0,<5.0.0" [tool.poetry.group.dev.dependencies] -torch = {version = "^2.1.0+cpu", source = "pytorch"} pytest = "^7.4.2" pytest-cov = "^4.1.0" pre-commit = "^3.4.0" tabulate = "^0.9" -# for TokenClassificationModelWithSeq2SeqEncoderAndCrf -pytorch-crf = ">=0.7.2" # for rouge metric (tests only) and for NltkSentenceSplitter nltk = "^3.8.1" # for NltkSentenceSplitter @@ -50,17 +44,11 @@ networkx = "^3.0.0" [tool.poetry.group.dev] optional = true -[[tool.poetry.source]] -name = "pytorch" -url = "https://download.pytorch.org/whl/cpu" -priority = "explicit" - [[tool.poetry.source]] name = "pre-release" url = "https://test.pypi.org/simple/" priority = "explicit" - [tool.pytest.ini_options] addopts = [ "--color=yes", diff --git a/src/pie_modules/annotations.py b/src/pie_modules/annotations.py index d7710595f..dfdbb586a 100644 --- a/src/pie_modules/annotations.py +++ b/src/pie_modules/annotations.py @@ -1,22 +1,192 @@ import dataclasses -from typing import Optional +from dataclasses import dataclass, field +from typing import Any, Optional, Tuple from pie_core import Annotation -# re-export all annotations from pytorch_ie to have a single entry point -from pytorch_ie.annotations import ( - BinaryRelation, - Label, - LabeledMultiSpan, - LabeledSpan, - MultiLabel, - MultiLabeledBinaryRelation, - MultiLabeledSpan, - MultiSpan, - NaryRelation, - Span, - _post_init_single_label, -) + +def _post_init_single_label(self): + if not isinstance(self.label, str): + raise ValueError("label must be a single string.") + + if not isinstance(self.score, float): + raise ValueError("score must be a single float.") + + +def _post_init_multi_label(self): + if self.score is None: + score = tuple([1.0] * len(self.label)) + object.__setattr__(self, "score", score) + + if not isinstance(self.label, tuple): + object.__setattr__(self, "label", tuple(self.label)) + + if not isinstance(self.score, tuple): + object.__setattr__(self, "score", tuple(self.score)) + + if len(self.label) != len(self.score): + raise ValueError( + f"Number of labels ({len(self.label)}) and scores ({len(self.score)}) must be equal." + ) + + +def _post_init_multi_span(self): + if isinstance(self.slices, list): + object.__setattr__(self, "slices", tuple(tuple(s) for s in self.slices)) + + +def _post_init_arguments_and_roles(self): + if len(self.arguments) != len(self.roles): + raise ValueError( + f"Number of arguments ({len(self.arguments)}) and roles ({len(self.roles)}) must be equal" + ) + if not isinstance(self.arguments, tuple): + object.__setattr__(self, "arguments", tuple(self.arguments)) + if not isinstance(self.roles, tuple): + object.__setattr__(self, "roles", tuple(self.roles)) + + +@dataclass(eq=True, frozen=True) +class Label(Annotation): + label: str + score: float = field(default=1.0, compare=False) + + def __post_init__(self) -> None: + _post_init_single_label(self) + + def resolve(self) -> Any: + return self.label + + +@dataclass(eq=True, frozen=True) +class MultiLabel(Annotation): + label: Tuple[str, ...] + score: Optional[Tuple[float, ...]] = field(default=None, compare=False) + + def __post_init__(self) -> None: + _post_init_multi_label(self) + + def resolve(self) -> Any: + return self.label + + +@dataclass(eq=True, frozen=True) +class Span(Annotation): + start: int + end: int + + def __str__(self) -> str: + if not self.is_attached: + return super().__str__() + return str(self.target[self.start : self.end]) + + def resolve(self) -> Any: + if self.is_attached: + return self.target[self.start : self.end] + else: + raise ValueError(f"{self} is not attached to a target.") + + +@dataclass(eq=True, frozen=True) +class LabeledSpan(Span): + label: str + score: float = field(default=1.0, compare=False) + + def __post_init__(self) -> None: + _post_init_single_label(self) + + def resolve(self) -> Any: + return self.label, super().resolve() + + +@dataclass(eq=True, frozen=True) +class MultiLabeledSpan(Span): + label: Tuple[str, ...] + score: Optional[Tuple[float, ...]] = field(default=None, compare=False) + + def __post_init__(self) -> None: + _post_init_multi_label(self) + + def resolve(self) -> Any: + return self.label, super().resolve() + + +@dataclass(eq=True, frozen=True) +class MultiSpan(Annotation): + slices: Tuple[Tuple[int, int], ...] + + def __post_init__(self) -> None: + _post_init_multi_span(self) + + def __str__(self) -> str: + if not self.is_attached: + return super().__str__() + return str(tuple(self.target[start:end] for start, end in self.slices)) + + def resolve(self) -> Any: + if self.is_attached: + return tuple(self.target[start:end] for start, end in self.slices) + else: + raise ValueError(f"{self} is not attached to a target.") + + +@dataclass(eq=True, frozen=True) +class LabeledMultiSpan(MultiSpan): + label: str + score: float = field(default=1.0, compare=False) + + def __post_init__(self) -> None: + super().__post_init__() + _post_init_single_label(self) + + def resolve(self) -> Any: + return self.label, super().resolve() + + +@dataclass(eq=True, frozen=True) +class BinaryRelation(Annotation): + head: Annotation + tail: Annotation + label: str + score: float = field(default=1.0, compare=False) + + def __post_init__(self) -> None: + _post_init_single_label(self) + + def resolve(self) -> Any: + return self.label, (self.head.resolve(), self.tail.resolve()) + + +@dataclass(eq=True, frozen=True) +class MultiLabeledBinaryRelation(Annotation): + head: Annotation + tail: Annotation + label: Tuple[str, ...] + score: Optional[Tuple[float, ...]] = field(default=None, compare=False) + + def __post_init__(self) -> None: + _post_init_multi_label(self) + + def resolve(self) -> Any: + return self.label, (self.head.resolve(), self.tail.resolve()) + + +@dataclass(eq=True, frozen=True) +class NaryRelation(Annotation): + arguments: Tuple[Annotation, ...] + roles: Tuple[str, ...] + label: str + score: float = field(default=1.0, compare=False) + + def __post_init__(self) -> None: + _post_init_arguments_and_roles(self) + _post_init_single_label(self) + + def resolve(self) -> Any: + return ( + self.label, + tuple((role, arg.resolve()) for arg, role in zip(self.arguments, self.roles)), + ) @dataclasses.dataclass(eq=True, frozen=True) diff --git a/src/pie_modules/document/processing/sentence_splitter.py b/src/pie_modules/document/processing/sentence_splitter.py index 5230e2d19..14e1b0e7d 100644 --- a/src/pie_modules/document/processing/sentence_splitter.py +++ b/src/pie_modules/document/processing/sentence_splitter.py @@ -46,6 +46,7 @@ def __init__( self.inplace = inplace # download the NLTK Punkt tokenizer model nltk.download("punkt") + nltk.download("punkt_tab") self.sentencizer = nltk.data.load(sentencizer_url) def __call__(self, document: D) -> D: diff --git a/src/pie_modules/document/processing/text_span_trimmer.py b/src/pie_modules/document/processing/text_span_trimmer.py index 412e01110..eb27a58f4 100644 --- a/src/pie_modules/document/processing/text_span_trimmer.py +++ b/src/pie_modules/document/processing/text_span_trimmer.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import TypeVar +from typing import Any, Dict, TypeVar from pie_core import AnnotationLayer, Document @@ -45,6 +45,7 @@ def trim_text_spans( text = spans.target + original_kwargs: dict[str, Any] for span in spans: if isinstance(span, Span): starts_and_ends = [(span.start, span.end)] @@ -99,6 +100,7 @@ def trim_text_spans( ) removed_span_ids.append(span._id) continue + new_kwargs: dict[str, Any] if isinstance(span, Span): if not len(new_starts_and_ends) == 1: raise ValueError(f"Expected one span, got {len(new_starts_and_ends)}") diff --git a/src/pie_modules/document/processing/tokenization.py b/src/pie_modules/document/processing/tokenization.py index 12a8a6be3..d99da6fbd 100644 --- a/src/pie_modules/document/processing/tokenization.py +++ b/src/pie_modules/document/processing/tokenization.py @@ -18,7 +18,6 @@ from pie_core import Annotation from pie_core.utils.hydra import resolve_type -from transformers import PreTrainedTokenizer from pie_modules.annotations import MultiSpan, Span from pie_modules.documents import TextBasedDocument, TokenBasedDocument @@ -105,13 +104,13 @@ def char_span_to_token_span( f"The first target of a text targeting span must be a string, but found {type(base_text)} as first " f"target type. Can not convert the span {span}." ) - stripped_slices = [ - get_stripped_offsets(start, end, base_text) for start, end in span.slices - ] + stripped_slices = tuple( + [get_stripped_offsets(start, end, base_text) for start, end in span.slices] + ) else: stripped_slices = span.slices # remove empty and invalid slices - stripped_slices = [(start, end) for start, end in stripped_slices if start < end] + stripped_slices = tuple([(start, end) for start, end in stripped_slices if start < end]) if len(stripped_slices) == 0: return None slices_inclusive_end = [ @@ -453,7 +452,7 @@ def token_based_document_to_text_based( def tokenize_document( doc: TextBasedDocument, - tokenizer: PreTrainedTokenizer, + tokenizer: Callable, result_document_type: Type[ToD], partition_layer: Optional[str] = None, strip_spans: bool = False, diff --git a/src/pie_modules/documents.py b/src/pie_modules/documents.py index bf07de932..326621cdb 100644 --- a/src/pie_modules/documents.py +++ b/src/pie_modules/documents.py @@ -1,29 +1,8 @@ import dataclasses +from typing import Any, Dict, Optional, Tuple -from pie_core import AnnotationLayer, annotation_field - -# re-export all documents from pytorch_ie to have a single entry point -from pytorch_ie.documents import ( - TextBasedDocument, - TextDocumentWithLabel, - TextDocumentWithLabeledMultiSpans, - TextDocumentWithLabeledMultiSpansAndBinaryRelations, - TextDocumentWithLabeledMultiSpansAndLabeledPartitions, - TextDocumentWithLabeledMultiSpansBinaryRelationsAndLabeledPartitions, - TextDocumentWithLabeledPartitions, - TextDocumentWithLabeledSpans, - TextDocumentWithLabeledSpansAndBinaryRelations, - TextDocumentWithLabeledSpansAndLabeledPartitions, - TextDocumentWithLabeledSpansAndSentences, - TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions, - TextDocumentWithMultiLabel, - TextDocumentWithSentences, - TextDocumentWithSpans, - TextDocumentWithSpansAndBinaryRelations, - TextDocumentWithSpansAndLabeledPartitions, - TextDocumentWithSpansBinaryRelationsAndLabeledPartitions, - TokenBasedDocument, -) +from pie_core import AnnotationLayer, Document, annotation_field +from typing_extensions import TypeAlias from pie_modules.annotations import ( AbstractiveSummary, @@ -31,12 +10,172 @@ BinaryRelation, ExtractiveAnswer, GenerativeAnswer, + Label, LabeledMultiSpan, LabeledSpan, + MultiLabel, Question, + Span, ) +@dataclasses.dataclass +class WithMetadata: + id: Optional[str] = None + metadata: Dict[str, Any] = dataclasses.field(default_factory=dict) + + +@dataclasses.dataclass +class WithTokens: + tokens: Tuple[str, ...] + + +@dataclasses.dataclass +class WithText: + text: str + + +@dataclasses.dataclass +class TextBasedDocument(WithMetadata, WithText, Document): + pass + + +@dataclasses.dataclass +class TokenBasedDocument(WithMetadata, WithTokens, Document): + def __post_init__(self) -> None: + + # When used in a dataset, the document gets serialized to json like structure which does not know tuples, + # so they get converted to lists. This is a workaround to automatically convert the "tokens" back to tuples + # when the document is created from a dataset. + if isinstance(self.tokens, list): + object.__setattr__(self, "tokens", tuple(self.tokens)) + elif not isinstance(self.tokens, tuple): + raise ValueError("tokens must be a tuple.") + + # Call the default document construction code + super().__post_init__() + + +# backwards compatibility +TextDocument: TypeAlias = TextBasedDocument + + +@dataclasses.dataclass +class DocumentWithLabel(Document): + label: AnnotationLayer[Label] = annotation_field() + + +@dataclasses.dataclass +class DocumentWithMultiLabel(Document): + label: AnnotationLayer[MultiLabel] = annotation_field() + + +@dataclasses.dataclass +class TextDocumentWithLabel(DocumentWithLabel, TextBasedDocument): + pass + + +@dataclasses.dataclass +class TextDocumentWithMultiLabel(DocumentWithMultiLabel, TextBasedDocument): + pass + + +@dataclasses.dataclass +class TextDocumentWithLabeledPartitions(TextBasedDocument): + labeled_partitions: AnnotationLayer[LabeledSpan] = annotation_field(target="text") + + +@dataclasses.dataclass +class TextDocumentWithSentences(TextBasedDocument): + sentences: AnnotationLayer[Span] = annotation_field(target="text") + + +@dataclasses.dataclass +class TextDocumentWithSpans(TextBasedDocument): + spans: AnnotationLayer[Span] = annotation_field(target="text") + + +@dataclasses.dataclass +class TextDocumentWithLabeledSpans(TextBasedDocument): + labeled_spans: AnnotationLayer[LabeledSpan] = annotation_field(target="text") + + +@dataclasses.dataclass +class TextDocumentWithLabeledSpansAndLabeledPartitions( + TextDocumentWithLabeledSpans, TextDocumentWithLabeledPartitions +): + pass + + +@dataclasses.dataclass +class TextDocumentWithLabeledSpansAndSentences( + TextDocumentWithLabeledSpans, TextDocumentWithSentences +): + pass + + +@dataclasses.dataclass +class TextDocumentWithLabeledSpansAndBinaryRelations(TextDocumentWithLabeledSpans): + binary_relations: AnnotationLayer[BinaryRelation] = annotation_field(target="labeled_spans") + + +@dataclasses.dataclass +class TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions( + TextDocumentWithLabeledSpansAndLabeledPartitions, + TextDocumentWithLabeledSpansAndBinaryRelations, + TextDocumentWithLabeledPartitions, +): + pass + + +@dataclasses.dataclass +class TextDocumentWithSpansAndBinaryRelations(TextDocumentWithSpans): + binary_relations: AnnotationLayer[BinaryRelation] = annotation_field(target="spans") + + +@dataclasses.dataclass +class TextDocumentWithSpansAndLabeledPartitions( + TextDocumentWithSpans, TextDocumentWithLabeledPartitions +): + pass + + +@dataclasses.dataclass +class TextDocumentWithSpansBinaryRelationsAndLabeledPartitions( + TextDocumentWithSpansAndLabeledPartitions, + TextDocumentWithSpansAndBinaryRelations, + TextDocumentWithLabeledPartitions, +): + pass + + +@dataclasses.dataclass +class TextDocumentWithLabeledMultiSpans(TextBasedDocument): + labeled_multi_spans: AnnotationLayer[LabeledMultiSpan] = annotation_field(target="text") + + +@dataclasses.dataclass +class TextDocumentWithLabeledMultiSpansAndLabeledPartitions( + TextDocumentWithLabeledMultiSpans, TextDocumentWithLabeledPartitions +): + pass + + +@dataclasses.dataclass +class TextDocumentWithLabeledMultiSpansAndBinaryRelations(TextDocumentWithLabeledMultiSpans): + binary_relations: AnnotationLayer[BinaryRelation] = annotation_field( + target="labeled_multi_spans" + ) + + +@dataclasses.dataclass +class TextDocumentWithLabeledMultiSpansBinaryRelationsAndLabeledPartitions( + TextDocumentWithLabeledMultiSpansAndLabeledPartitions, + TextDocumentWithLabeledMultiSpansAndBinaryRelations, +): + pass + + @dataclasses.dataclass class TextDocumentWithQuestionsAndExtractiveAnswers(TextBasedDocument): """A text based PIE document with annotations for extractive question answering.""" @@ -66,7 +205,6 @@ class TokenDocumentWithQuestionsAndExtractiveAnswers(TokenBasedDocument): # backwards compatibility ExtractiveQADocument = TextDocumentWithQuestionsAndExtractiveAnswers TokenizedExtractiveQADocument = TokenDocumentWithQuestionsAndExtractiveAnswers -TextDocument = TextBasedDocument @dataclasses.dataclass diff --git a/src/pie_modules/metrics/relation_argument_distance_collector.py b/src/pie_modules/metrics/relation_argument_distance_collector.py index 935c11cbc..ff0cdeb1c 100644 --- a/src/pie_modules/metrics/relation_argument_distance_collector.py +++ b/src/pie_modules/metrics/relation_argument_distance_collector.py @@ -82,7 +82,7 @@ def _collect(self, doc: Document) -> Dict[str, List[float]]: for binary_relation in layer_obj: if isinstance(binary_relation, BinaryRelation): - args = [binary_relation.head, binary_relation.tail] + args = (binary_relation.head, binary_relation.tail) label = binary_relation.label elif isinstance(binary_relation, NaryRelation): args = binary_relation.arguments diff --git a/src/pie_modules/metrics/span_length_collector.py b/src/pie_modules/metrics/span_length_collector.py index 8acac1399..66b0a8db7 100644 --- a/src/pie_modules/metrics/span_length_collector.py +++ b/src/pie_modules/metrics/span_length_collector.py @@ -1,6 +1,6 @@ import logging from collections import defaultdict -from typing import Any, Callable, Dict, List, Optional, Type, Union +from typing import Any, Dict, List, Optional, Type, Union from pie_core import Document, DocumentStatistic from pie_core.utils.hydra import resolve_type diff --git a/src/pie_modules/models/__init__.py b/src/pie_modules/models/__init__.py deleted file mode 100644 index df8f4a035..000000000 --- a/src/pie_modules/models/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from .sequence_classification_with_pooler import ( - SequenceClassificationModelWithPooler, - SequencePairSimilarityModelWithPooler, -) -from .simple_extractive_question_answering import SimpleExtractiveQuestionAnsweringModel -from .simple_generative import SimpleGenerativeModel -from .simple_sequence_classification import SimpleSequenceClassificationModel -from .simple_token_classification import SimpleTokenClassificationModel -from .span_tuple_classification import SpanTupleClassificationModel -from .token_classification_with_seq2seq_encoder_and_crf import ( - TokenClassificationModelWithSeq2SeqEncoderAndCrf, -) diff --git a/src/pie_modules/models/base_models/__init__.py b/src/pie_modules/models/base_models/__init__.py deleted file mode 100644 index 8bc2cf097..000000000 --- a/src/pie_modules/models/base_models/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .bart_as_pointer_network import BartAsPointerNetwork -from .bart_with_decoder_position_ids import BartModelWithDecoderPositionIds diff --git a/src/pie_modules/models/base_models/bart_as_pointer_network.py b/src/pie_modules/models/base_models/bart_as_pointer_network.py index b17540263..e69de29bb 100644 --- a/src/pie_modules/models/base_models/bart_as_pointer_network.py +++ b/src/pie_modules/models/base_models/bart_as_pointer_network.py @@ -1,476 +0,0 @@ -from typing import Any, Dict, Iterator, List, Optional, Tuple, Union - -import torch.utils.checkpoint -from torch import nn -from torch.nn import Parameter -from torch.optim import Optimizer -from transformers import BartConfig, BartModel, BartPreTrainedModel, GenerationConfig -from transformers.modeling_outputs import Seq2SeqLMOutput, Seq2SeqModelOutput -from transformers.models.bart.modeling_bart import shift_tokens_right -from transformers.utils import logging - -from pie_modules.models.base_models.bart_with_decoder_position_ids import ( - BartModelWithDecoderPositionIds, -) -from pie_modules.models.components.pointer_head import PointerHead - -logger = logging.get_logger(__name__) - - -def get_layer_norm_parameters( - named_parameters: Iterator[Tuple[str, Parameter]], -) -> Iterator[Parameter]: - return ( - param for name, param in named_parameters if "layernorm" in name or "layer_norm" in name - ) - - -def get_non_layer_norm_parameters( - named_parameters: Iterator[Tuple[str, Parameter]], -) -> Iterator[Parameter]: - return ( - param - for name, param in named_parameters - if not ("layernorm" in name or "layer_norm" in name) - ) - - -class BartAsPointerNetworkConfig(BartConfig): - def __init__( - self, - # respective token ids for the label-, eos-, and pad ids. Can be used as a mapping from the - # target ids to the token ids. - target_token_ids: Optional[List[int]] = None, - # token id mapping to better initialize the label embedding weights - embedding_weight_mapping: Optional[Dict[Union[int, str], List[int]]] = None, - # special decoder position id handling - decoder_position_id_mode: Optional[str] = None, - decoder_position_id_pattern: Optional[List[int]] = None, - decoder_position_id_mapping: Optional[Dict[int, int]] = None, - # other parameters - use_encoder_mlp: bool = True, - use_constraints_encoder_mlp: bool = False, - # optimizer - lr: float = 5e-5, - task_lr: Optional[float] = None, - weight_decay: float = 1e-2, - head_decay: Optional[float] = None, - shared_decay: Optional[float] = None, - encoder_layer_norm_decay: Optional[float] = 0.001, - decoder_layer_norm_decay: Optional[float] = None, - # other BartConfig parameters - **kwargs, - ): - super().__init__(**kwargs) - - self.target_token_ids = target_token_ids - - self.embedding_weight_mapping = embedding_weight_mapping - - self.use_encoder_mlp = use_encoder_mlp - self.use_constraints_encoder_mlp = use_constraints_encoder_mlp - - self.decoder_position_id_mode = decoder_position_id_mode - self.decoder_position_id_pattern = decoder_position_id_pattern - self.decoder_position_id_mapping = decoder_position_id_mapping - - self.lr = lr - self.task_lr = task_lr - self.weight_decay = weight_decay - self.head_decay = head_decay - self.shared_decay = shared_decay - self.encoder_layer_norm_decay = encoder_layer_norm_decay - self.decoder_layer_norm_decay = decoder_layer_norm_decay - - -class BartAsPointerNetwork(BartPreTrainedModel): - config_class = BartAsPointerNetworkConfig - base_model_prefix = "model" - _tied_weights_keys = [ - "encoder.embed_tokens.weight", - "decoder.embed_tokens.weight", - ] - - def __init__(self, config: BartAsPointerNetworkConfig): - super().__init__(config) - if self.config.decoder_position_id_mode is not None: - self.model = BartModelWithDecoderPositionIds(config) - else: - self.model = BartModel(config) - - self.pointer_head = PointerHead( - # target space ids - bos_id=self.model.config.bos_token_id, - eos_id=self.model.config.eos_token_id, - pad_id=self.model.config.pad_token_id, - # decoder-input token ids - target_token_ids=self.model.config.target_token_ids, - # embeddings - embeddings=self.model.decoder.embed_tokens, - embedding_weight_mapping=self.model.config.embedding_weight_mapping, - # other parameters - use_encoder_mlp=self.model.config.use_encoder_mlp, - use_constraints_encoder_mlp=self.model.config.use_constraints_encoder_mlp, - decoder_position_id_mode=self.model.config.decoder_position_id_mode, - decoder_position_id_pattern=self.model.config.decoder_position_id_pattern, - decoder_position_id_mapping=self.model.config.decoder_position_id_mapping, - ) - - # Initialize weights and apply final processing - self.post_init() - - @classmethod - def _load_pretrained_model( - cls, - *args, - **kwargs, - ): - ( - model, - missing_keys, - unexpected_keys, - mismatched_keys, - offload_index, - error_msgs, - ) = super()._load_pretrained_model(*args, **kwargs) - # adjust the model after loading the original model (e.g. vanilla BartModel) - model.adjust_after_loading_original_model() - return model, missing_keys, unexpected_keys, mismatched_keys, offload_index, error_msgs - - def resize_token_embeddings( - self, new_num_tokens: Optional[int] = None, pad_to_multiple_of: Optional[int] = None - ) -> nn.Embedding: - new_embeddings = super().resize_token_embeddings(new_num_tokens, pad_to_multiple_of) - # we also need to update the embeddings in the pointer head - self.pointer_head.set_embeddings(new_embeddings) - return new_embeddings - - def adjust_after_loading_original_model(self): - # target_token_ids contains all new target tokens for the labels and new tokens were added to the end - # of the vocabulary, so we can use its maximum to resize the embedding weights - self.resize_token_embeddings(new_num_tokens=max(self.config.target_token_ids) + 1) - # initialize the newly added embeddings for the labels with better weights from the original embeddings - self.pointer_head.overwrite_embeddings_with_mapping() - - # adjust generation settings - # set the correct decoder_start_token_id - self.config.decoder_start_token_id = self.config.bos_token_id - # disable ForcedBOSTokenLogitsProcessor - self.config.forced_bos_token_id = None - # disable ForcedEOSTokenLogitsProcessor - self.config.forced_eos_token_id = None - # update the generation config accordingly - self.generation_config = GenerationConfig.from_model_config(self.config) - - def base_model_named_params(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - yield from self.model.named_parameters(prefix=prefix + self.base_model_prefix) - - def head_named_params(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - base_model_param_names = { - name for name, param in self.base_model_named_params(prefix=prefix) - } - for name, param in self.named_parameters(prefix=prefix): - if name not in base_model_param_names: - yield name, param - - def encoder_only_named_params(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - shared_params = set(dict(self.encoder_decoder_shared_named_params(prefix=prefix)).values()) - for name, param in self.model.encoder.named_parameters( - prefix=prefix + self.base_model_prefix + ".encoder" - ): - if param not in shared_params: - yield name, param - - def decoder_only_named_params(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - shared_params = set(dict(self.encoder_decoder_shared_named_params(prefix=prefix)).values()) - for name, param in self.model.decoder.named_parameters( - prefix=prefix + self.base_model_prefix + ".decoder" - ): - if param not in shared_params: - yield name, param - - def encoder_decoder_shared_named_params( - self, prefix: str = "" - ) -> Iterator[Tuple[str, Parameter]]: - encoder_params = set(self.model.encoder.parameters()) - decoder_params = set(self.model.decoder.parameters()) - for name, param in self.base_model_named_params(prefix=prefix): - if param in encoder_params and param in decoder_params: - yield name, param - - def get_encoder(self): - return self.model.get_encoder() - - def get_decoder(self): - return self.model.get_decoder() - - # @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) - # @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) - # @add_end_docstrings(BART_GENERATION_EXAMPLE) - def forward( - self, - input_ids: torch.LongTensor = None, - attention_mask: Optional[torch.Tensor] = None, - decoder_input_ids: Optional[torch.LongTensor] = None, - decoder_attention_mask: Optional[torch.LongTensor] = None, - decoder_position_ids: Optional[torch.LongTensor] = None, - constraints: Optional[torch.LongTensor] = None, - head_mask: Optional[torch.Tensor] = None, - decoder_head_mask: Optional[torch.Tensor] = None, - cross_attn_head_mask: Optional[torch.Tensor] = None, - encoder_outputs: Optional[List[torch.FloatTensor]] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - decoder_inputs_embeds: Optional[torch.FloatTensor] = None, - labels: Optional[torch.LongTensor] = None, - use_cache: Optional[bool] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, - return_dict: Optional[bool] = None, - ) -> Union[Tuple, Seq2SeqLMOutput]: - r"""Labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels - for computing the masked language modeling loss. Indices should either be in `[0, ..., - config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` - are ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., - config.vocab_size]`. - - Returns: - """ - return_dict = return_dict if return_dict is not None else self.config.use_return_dict - - if labels is not None: - if use_cache: - logger.warning( - "The `use_cache` argument is changed to `False` since `labels` is provided." - ) - use_cache = False - if decoder_input_ids is None and decoder_inputs_embeds is None: - decoder_input_ids = shift_tokens_right( - labels, self.config.pad_token_id, self.config.decoder_start_token_id - ) - - if decoder_input_ids is None: - # we can not create the decoder_input_ids from input_ids, because we need the - # encoder_input_ids for the pointer network - raise ValueError("decoder_input_ids has to be set!") - - # this adjusts the input_ids and, if available, the position_ids - decoder_inputs = self.pointer_head.prepare_decoder_inputs( - input_ids=decoder_input_ids, - # in the case of generation (with past_key_values) the position_ids are already prepared - position_ids=decoder_position_ids, - encoder_input_ids=input_ids, - ) - - model_inputs = dict( - input_ids=input_ids, - encoder_outputs=encoder_outputs, - attention_mask=attention_mask, - decoder_attention_mask=decoder_attention_mask, - head_mask=head_mask, - decoder_head_mask=decoder_head_mask, - cross_attn_head_mask=cross_attn_head_mask, - past_key_values=past_key_values, - inputs_embeds=inputs_embeds, - decoder_inputs_embeds=decoder_inputs_embeds, - use_cache=use_cache, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict=True, - ) - for k, v in decoder_inputs.items(): - model_inputs[f"decoder_{k}"] = v - outputs = self.model(**model_inputs) - - if not isinstance(outputs, Seq2SeqModelOutput): - raise ValueError( - "Inconsistent output: The output of the model forward should be of type " - f"`Seq2SeqLMOutput`, but is of type `{type(outputs)}`." - ) - logits, loss = self.pointer_head( - last_hidden_state=outputs.last_hidden_state, - encoder_last_hidden_state=outputs.encoder_last_hidden_state, - encoder_input_ids=input_ids, - encoder_attention_mask=attention_mask, - labels=labels, - decoder_attention_mask=decoder_attention_mask, - constraints=constraints, - ) - - if not return_dict: - output = (logits,) + outputs[1:] - return ((loss,) + output) if loss is not None else output - - return Seq2SeqLMOutput( - loss=loss, - logits=logits, - past_key_values=outputs.past_key_values, - decoder_hidden_states=outputs.decoder_hidden_states, - decoder_attentions=outputs.decoder_attentions, - cross_attentions=outputs.cross_attentions, - encoder_last_hidden_state=outputs.encoder_last_hidden_state, - encoder_hidden_states=outputs.encoder_hidden_states, - encoder_attentions=outputs.encoder_attentions, - ) - - def prepare_inputs_for_generation( - self, - decoder_input_ids, - encoder_input_ids, # added for pointer network - encoder_attention_mask, # added for pointer network - past_key_values=None, - attention_mask=None, - decoder_attention_mask=None, - head_mask=None, - decoder_head_mask=None, - cross_attn_head_mask=None, - use_cache=None, - encoder_outputs=None, - **kwargs, - ): - result = {} - if self.pointer_head.use_prepared_position_ids: - # we need to prepare the position ids for the decoder here, because later we do not have the full - # input_ids anymore - result["decoder_position_ids"] = self.pointer_head.prepare_decoder_position_ids( - input_ids=decoder_input_ids - ) - - # cut decoder_input_ids if past_key_values is used - if past_key_values is not None: - past_length = past_key_values[0][0].shape[2] - - # Some generation methods already pass only the last input ID - if decoder_input_ids.shape[1] > past_length: - remove_prefix_length = past_length - else: - # Default to old behavior: keep only final ID - remove_prefix_length = decoder_input_ids.shape[1] - 1 - - decoder_input_ids = decoder_input_ids[:, remove_prefix_length:] - - if "decoder_position_ids" in result: - result["decoder_position_ids"] = result["decoder_position_ids"][ - :, remove_prefix_length: - ] - - result.update( - { - "input_ids": encoder_input_ids, - "encoder_outputs": encoder_outputs, - "past_key_values": past_key_values, - "decoder_input_ids": decoder_input_ids, - "attention_mask": encoder_attention_mask, - "decoder_attention_mask": decoder_attention_mask, - "head_mask": head_mask, - "decoder_head_mask": decoder_head_mask, - "cross_attn_head_mask": cross_attn_head_mask, - "use_cache": use_cache, # change this to avoid caching (presumably for debugging) - } - ) - return result - - def prepare_decoder_input_ids_from_labels(self, labels: torch.Tensor): - return shift_tokens_right( - labels, self.config.pad_token_id, self.config.decoder_start_token_id - ) - - @staticmethod - def _reorder_cache(past_key_values, beam_idx): - reordered_past = () - for layer_past in past_key_values: - # cached cross_attention states don't have to be reordered -> they are always the same - reordered_past += ( - tuple( - past_state.index_select(0, beam_idx.to(past_state.device)) - for past_state in layer_past[:2] - ) - + layer_past[2:], - ) - return reordered_past - - def _prepare_encoder_decoder_kwargs_for_generation( - self, inputs_tensor: torch.Tensor, model_kwargs, model_input_name: Optional[str] = None - ) -> Dict[str, Any]: - result = super()._prepare_encoder_decoder_kwargs_for_generation( - inputs_tensor=inputs_tensor, - model_kwargs=model_kwargs, - model_input_name=model_input_name, - ) - # add items that are needed for pointer network - result["encoder_input_ids"] = inputs_tensor - result["encoder_attention_mask"] = result["attention_mask"] - return result - - def configure_optimizer(self) -> Optimizer: - parameters = [] - - # head parameters - head_decay = ( - self.config.head_decay - if self.config.head_decay is not None - else self.config.weight_decay - ) - params = { - "lr": self.config.task_lr if self.config.task_lr is not None else self.config.lr, - "weight_decay": head_decay, - "params": dict(self.head_named_params()).values(), - } - parameters.append(params) - - # decoder only layer norm parameters - decoder_layer_norm_decay = ( - self.config.decoder_layer_norm_decay - if self.config.decoder_layer_norm_decay is not None - else self.config.weight_decay - ) - params = { - "lr": self.config.lr, - "weight_decay": decoder_layer_norm_decay, - "params": get_layer_norm_parameters(self.decoder_only_named_params()), - } - parameters.append(params) - - # decoder only other parameters - params = { - "lr": self.config.lr, - "weight_decay": self.config.weight_decay, - "params": get_non_layer_norm_parameters(self.decoder_only_named_params()), - } - parameters.append(params) - - # encoder only layer norm parameters - encoder_layer_norm_decay = ( - self.config.encoder_layer_norm_decay - if self.config.encoder_layer_norm_decay is not None - else self.config.weight_decay - ) - params = { - "lr": self.config.lr, - "weight_decay": encoder_layer_norm_decay, - "params": get_layer_norm_parameters(self.encoder_only_named_params()), - } - parameters.append(params) - - # encoder only other parameters - params = { - "lr": self.config.lr, - "weight_decay": self.config.weight_decay, - "params": get_non_layer_norm_parameters(self.encoder_only_named_params()), - } - parameters.append(params) - - # encoder-decoder shared parameters - shared_decay = ( - self.config.shared_decay - if self.config.shared_decay is not None - else self.config.weight_decay - ) - params = { - "lr": self.config.lr, - "weight_decay": shared_decay, - "params": dict(self.encoder_decoder_shared_named_params()).values(), - } - parameters.append(params) - - return torch.optim.AdamW(parameters) diff --git a/src/pie_modules/models/base_models/bart_with_decoder_position_ids.py b/src/pie_modules/models/base_models/bart_with_decoder_position_ids.py deleted file mode 100644 index 03ca5dea6..000000000 --- a/src/pie_modules/models/base_models/bart_with_decoder_position_ids.py +++ /dev/null @@ -1,536 +0,0 @@ -# Copyright 2021 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""PyTorch BART model, but the decoder accepts predefined position ids. If not provided, the -original logic is used to create the position ids. - -The model is based on the BartModel from Transformers 4.35.0, -i.e. https://github.com/huggingface/transformers/blob/v4.35.0/src/transformers/models/bart/modeling_bart.py. - -Note: This also contains some minor modifications to make the code mypy (v1.4.1) compliant. -. -""" -import math -from typing import Any, List, Optional, Tuple, Union - -import torch -import torch.utils.checkpoint -from torch import nn -from transformers.modeling_attn_mask_utils import ( - _prepare_4d_attention_mask, - _prepare_4d_causal_attention_mask, -) -from transformers.modeling_outputs import ( - BaseModelOutput, - BaseModelOutputWithPastAndCrossAttentions, - Seq2SeqModelOutput, -) -from transformers.models.bart import BartConfig -from transformers.models.bart.modeling_bart import ( - _CHECKPOINT_FOR_DOC, - _CONFIG_FOR_DOC, - _EXPECTED_OUTPUT_SHAPE, - BART_INPUTS_DOCSTRING, - BART_START_DOCSTRING, - BartDecoderLayer, - BartEncoder, - BartPreTrainedModel, - shift_tokens_right, -) -from transformers.utils import ( - add_code_sample_docstrings, - add_start_docstrings, - add_start_docstrings_to_model_forward, - logging, -) - -logger = logging.get_logger(__name__) - - -class BartLearnedPositionalEmbeddingWithPositionIds(nn.Embedding): - """This module learns positional embeddings up to a fixed maximum size.""" - - def __init__(self, num_embeddings: int, embedding_dim: int): - # Bart is set up so that if padding_idx is specified then offset the embedding ids by 2 - # and adjust num_embeddings appropriately. Other models don't have this hack - self.offset = 2 - super().__init__(num_embeddings + self.offset, embedding_dim) - - def forward( - self, - input_ids: torch.Tensor, - past_key_values_length: int = 0, - position_ids: Optional[torch.Tensor] = None, - ): - """`input_ids' shape is expected to be [bsz x seqlen].""" - - if position_ids is None: - bsz, seq_len = input_ids.shape[:2] - positions = torch.arange( - past_key_values_length, - past_key_values_length + seq_len, - dtype=torch.long, - device=self.weight.device, - ).expand(bsz, -1) - else: - positions = position_ids - - return super().forward(positions + self.offset) - - -class BartDecoderWithPositionIds(BartPreTrainedModel): - """Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a - [`BartDecoderLayer`] - - Args: - config: BartConfig - embed_tokens (nn.Embedding): output embedding - """ - - def __init__(self, config: BartConfig, embed_tokens: Optional[nn.Embedding] = None): - super().__init__(config) - self.dropout = config.dropout - self.layerdrop = config.decoder_layerdrop - self.padding_idx = config.pad_token_id - self.max_target_positions = config.max_position_embeddings - self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0 - - self.embed_tokens = nn.Embedding(config.vocab_size, config.d_model, self.padding_idx) - - if embed_tokens is not None: - self.embed_tokens.weight = embed_tokens.weight - - self.embed_positions = BartLearnedPositionalEmbeddingWithPositionIds( - config.max_position_embeddings, - config.d_model, - ) - self.layers = nn.ModuleList( - [BartDecoderLayer(config) for _ in range(config.decoder_layers)] - ) - self.layernorm_embedding = nn.LayerNorm(config.d_model) - - self.gradient_checkpointing = False - # Initialize weights and apply final processing - self.post_init() - - def get_input_embeddings(self): - return self.embed_tokens - - def set_input_embeddings(self, value): - self.embed_tokens = value - - def forward( - self, - input_ids: torch.LongTensor = None, - position_ids: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None, - encoder_hidden_states: Optional[torch.FloatTensor] = None, - encoder_attention_mask: Optional[torch.LongTensor] = None, - head_mask: Optional[torch.Tensor] = None, - cross_attn_head_mask: Optional[torch.Tensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - use_cache: Optional[bool] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, - return_dict: Optional[bool] = None, - ) -> Union[Tuple, BaseModelOutputWithPastAndCrossAttentions]: - r""" - Args: - input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): - Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you - provide it. - - Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and - [`PreTrainedTokenizer.__call__`] for details. - - [What are input IDs?](../glossary#input-ids) - position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): - Position indices for each input sequence token that are used to create the position embedding - of the sequence. If `None` (default), position ids are automatically created as sequential - integers (takes previous `past_key_values` into account, if provided). - attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): - Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: - - - 1 for tokens that are **not masked**, - - 0 for tokens that are **masked**. - - [What are attention masks?](../glossary#attention-mask) - encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*): - Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention - of the decoder. - encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*): - Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values - selected in `[0, 1]`: - - - 1 for tokens that are **not masked**, - - 0 for tokens that are **masked**. - - [What are attention masks?](../glossary#attention-mask) - head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): - Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`: - - - 1 indicates the head is **not masked**, - - 0 indicates the head is **masked**. - - cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): - Mask to nullify selected heads of the cross-attention modules in the decoder to avoid performing - cross-attention on hidden heads. Mask values selected in `[0, 1]`: - - - 1 indicates the head is **not masked**, - - 0 indicates the head is **masked**. - - past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`): - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of - shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of - shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`. - - Contains pre-computed hidden-states (key and values in the self-attention blocks and in the - cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding. - - If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those - that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all `decoder_input_ids` of shape `(batch_size, sequence_length)`. - inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): - Optionally, instead of passing `input_ids` you can choose to directly pass an embedded - representation. This is useful if you want more control over how to convert `input_ids` indices - into associated vectors than the model's internal embedding lookup matrix. - output_attentions (`bool`, *optional*): - Whether or not to return the attentions tensors of all attention layers. See `attentions` under - returned tensors for more detail. - output_hidden_states (`bool`, *optional*): - Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors - for more detail. - return_dict (`bool`, *optional*): - Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. - """ - output_attentions = ( - output_attentions if output_attentions is not None else self.config.output_attentions - ) - output_hidden_states = ( - output_hidden_states - if output_hidden_states is not None - else self.config.output_hidden_states - ) - use_cache = use_cache if use_cache is not None else self.config.use_cache - return_dict = return_dict if return_dict is not None else self.config.use_return_dict - - # retrieve input_ids and inputs_embeds - if input_ids is not None and inputs_embeds is not None: - raise ValueError( - "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time" - ) - elif input_ids is not None: - input = input_ids - input_shape = input.shape - input_ids = input_ids.view(-1, input_shape[-1]) - elif inputs_embeds is not None: - input_shape = inputs_embeds.size()[:-1] - input = inputs_embeds[:, :, -1] - else: - raise ValueError( - "You have to specify either decoder_input_ids or decoder_inputs_embeds" - ) - - # past_key_values_length - past_key_values_length = ( - past_key_values[0][0].shape[2] if past_key_values is not None else 0 - ) - - if inputs_embeds is None: - inputs_embeds = self.embed_tokens(input) * self.embed_scale - - if getattr(self.config, "_flash_attn_2_enabled", False): - # 2d mask is passed through the layers - attention_mask = ( - attention_mask if (attention_mask is not None and 0 in attention_mask) else None - ) - else: - # 4d mask is passed through the layers - attention_mask = _prepare_4d_causal_attention_mask( - attention_mask, input_shape, inputs_embeds, past_key_values_length - ) - - # expand encoder attention mask - if encoder_hidden_states is not None and encoder_attention_mask is not None: - if getattr(self.config, "_flash_attn_2_enabled", False): - encoder_attention_mask = ( - encoder_attention_mask if 0 in encoder_attention_mask else None - ) - else: - # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] - encoder_attention_mask = _prepare_4d_attention_mask( - encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1] - ) - - # embed positions - if position_ids is not None and position_ids.shape != input_shape: - raise ValueError( - f"Position IDs shape {position_ids.shape} does not match input ids shape {input_shape}." - ) - positions = self.embed_positions(input, past_key_values_length, position_ids) - positions = positions.to(inputs_embeds.device) - - hidden_states = inputs_embeds + positions - hidden_states = self.layernorm_embedding(hidden_states) - - hidden_states = nn.functional.dropout( - hidden_states, p=self.dropout, training=self.training - ) - - if self.gradient_checkpointing and self.training: - if use_cache: - logger.warning_once( - "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." - ) - use_cache = False - - # decoder layers - all_hidden_states: Optional[Tuple[Any, ...]] = () if output_hidden_states else None - all_self_attns: Optional[Tuple[Any, ...]] = () if output_attentions else None - all_cross_attentions: Optional[Tuple[Any, ...]] = ( - () if (output_attentions and encoder_hidden_states is not None) else None - ) - next_decoder_cache: Optional[Tuple[Any, ...]] = () if use_cache else None - - # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired - for attn_mask, mask_name in zip( - [head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"] - ): - if attn_mask is not None: - if attn_mask.size()[0] != (len(self.layers)): - raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" - f" {attn_mask.size()[0]}." - ) - - for idx, decoder_layer in enumerate(self.layers): - # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) - if all_hidden_states is not None: - all_hidden_states += (hidden_states,) - if self.training: - dropout_probability = torch.rand([]) - if dropout_probability < self.layerdrop: - continue - - past_key_value = past_key_values[idx] if past_key_values is not None else None - - if self.gradient_checkpointing and self.training: - layer_outputs = self._gradient_checkpointing_func( - decoder_layer.__call__, - hidden_states, - attention_mask, - encoder_hidden_states, - encoder_attention_mask, - head_mask[idx] if head_mask is not None else None, - cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None, - None, - output_attentions, - use_cache, - ) - else: - layer_outputs = decoder_layer( - hidden_states, - attention_mask=attention_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - layer_head_mask=(head_mask[idx] if head_mask is not None else None), - cross_attn_layer_head_mask=( - cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None - ), - past_key_value=past_key_value, - output_attentions=output_attentions, - use_cache=use_cache, - ) - hidden_states = layer_outputs[0] - - if next_decoder_cache is not None: - next_decoder_cache += (layer_outputs[3 if output_attentions else 1],) - - if all_self_attns is not None: - all_self_attns += (layer_outputs[1],) - - if all_cross_attentions is not None: - all_cross_attentions += (layer_outputs[2],) - - # add hidden states from the last decoder layer - if all_hidden_states is not None: - all_hidden_states += (hidden_states,) - - next_cache = next_decoder_cache if use_cache else None - if not return_dict: - return tuple( - v - for v in [ - hidden_states, - next_cache, - all_hidden_states, - all_self_attns, - all_cross_attentions, - ] - if v is not None - ) - return BaseModelOutputWithPastAndCrossAttentions( - last_hidden_state=hidden_states, - past_key_values=next_cache, - hidden_states=all_hidden_states, - attentions=all_self_attns, - cross_attentions=all_cross_attentions, - ) - - -@add_start_docstrings( - "The bare BART Model outputting raw hidden-states without any specific head on top.", - BART_START_DOCSTRING, -) -class BartModelWithDecoderPositionIds(BartPreTrainedModel): - _tied_weights_keys = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight"] - - def __init__(self, config: BartConfig): - super().__init__(config) - - padding_idx, vocab_size = config.pad_token_id, config.vocab_size - self.shared = nn.Embedding(vocab_size, config.d_model, padding_idx) - - self.encoder = BartEncoder(config, self.shared) - self.decoder = BartDecoderWithPositionIds(config, self.shared) - - # Initialize weights and apply final processing - self.post_init() - - def _tie_weights(self): - if self.config.tie_word_embeddings: - self._tie_or_clone_weights(self.encoder.embed_tokens, self.shared) - self._tie_or_clone_weights(self.decoder.embed_tokens, self.shared) - - def get_input_embeddings(self): - return self.shared - - def set_input_embeddings(self, value): - self.shared = value - self.encoder.embed_tokens = self.shared - self.decoder.embed_tokens = self.shared - - def get_encoder(self): - return self.encoder - - def get_decoder(self): - return self.decoder - - @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) - @add_code_sample_docstrings( - checkpoint=_CHECKPOINT_FOR_DOC, - output_type=Seq2SeqModelOutput, - config_class=_CONFIG_FOR_DOC, - expected_output=_EXPECTED_OUTPUT_SHAPE, - ) - def forward( - self, - input_ids: torch.LongTensor = None, - attention_mask: Optional[torch.Tensor] = None, - decoder_input_ids: Optional[torch.LongTensor] = None, - decoder_attention_mask: Optional[torch.LongTensor] = None, - decoder_position_ids: Optional[torch.Tensor] = None, - head_mask: Optional[torch.Tensor] = None, - decoder_head_mask: Optional[torch.Tensor] = None, - cross_attn_head_mask: Optional[torch.Tensor] = None, - encoder_outputs: Optional[List[torch.FloatTensor]] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - decoder_inputs_embeds: Optional[torch.FloatTensor] = None, - use_cache: Optional[bool] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, - return_dict: Optional[bool] = None, - ) -> Union[Tuple, Seq2SeqModelOutput]: - # different to other models, Bart automatically creates decoder_input_ids from - # input_ids if no decoder_input_ids are provided - if decoder_input_ids is None and decoder_inputs_embeds is None: - if input_ids is None: - raise ValueError( - "If no `decoder_input_ids` or `decoder_inputs_embeds` are " - "passed, `input_ids` cannot be `None`. Please pass either " - "`input_ids` or `decoder_input_ids` or `decoder_inputs_embeds`." - ) - - decoder_input_ids = shift_tokens_right( - input_ids, self.config.pad_token_id, self.config.decoder_start_token_id - ) - - output_attentions = ( - output_attentions if output_attentions is not None else self.config.output_attentions - ) - output_hidden_states = ( - output_hidden_states - if output_hidden_states is not None - else self.config.output_hidden_states - ) - use_cache = use_cache if use_cache is not None else self.config.use_cache - return_dict = return_dict if return_dict is not None else self.config.use_return_dict - - if encoder_outputs is None: - encoder_outputs = self.encoder( - input_ids=input_ids, - attention_mask=attention_mask, - head_mask=head_mask, - inputs_embeds=inputs_embeds, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict=return_dict, - ) - # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True - elif return_dict and not isinstance(encoder_outputs, BaseModelOutput): - encoder_outputs = BaseModelOutput( - last_hidden_state=encoder_outputs[0], - hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None, - attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None, - ) - - if not ( - isinstance(encoder_outputs, BaseModelOutput) or isinstance(encoder_outputs, tuple) - ): - raise ValueError( - "Inconsistent output: The output of the model encoder should be of type " - f"`BaseModelOutput` or tuple, but is of type `{type(encoder_outputs)}`." - ) - - # decoder outputs consists of (dec_features, past_key_value, dec_hidden, dec_attn) - decoder_outputs = self.decoder( - input_ids=decoder_input_ids, - attention_mask=decoder_attention_mask, - position_ids=decoder_position_ids, - encoder_hidden_states=encoder_outputs[0], - encoder_attention_mask=attention_mask, - head_mask=decoder_head_mask, - cross_attn_head_mask=cross_attn_head_mask, - past_key_values=past_key_values, - inputs_embeds=decoder_inputs_embeds, - use_cache=use_cache, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict=return_dict, - ) - - if not return_dict: - return decoder_outputs + encoder_outputs - - return Seq2SeqModelOutput( - last_hidden_state=decoder_outputs.last_hidden_state, - past_key_values=decoder_outputs.past_key_values, - decoder_hidden_states=decoder_outputs.hidden_states, - decoder_attentions=decoder_outputs.attentions, - cross_attentions=decoder_outputs.cross_attentions, - encoder_last_hidden_state=encoder_outputs.last_hidden_state, - encoder_hidden_states=encoder_outputs.hidden_states, - encoder_attentions=encoder_outputs.attentions, - ) diff --git a/src/pie_modules/models/common/__init__.py b/src/pie_modules/models/common/__init__.py deleted file mode 100644 index b021af996..000000000 --- a/src/pie_modules/models/common/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .has_taskmodule import HasTaskmodule -from .model_with_boilerplate import ModelWithBoilerplate -from .model_with_metrics_from_taskmodule import ModelWithMetricsFromTaskModule -from .stages import TESTING, TRAINING, VALIDATION diff --git a/src/pie_modules/models/common/has_taskmodule.py b/src/pie_modules/models/common/has_taskmodule.py deleted file mode 100644 index 03cd8154c..000000000 --- a/src/pie_modules/models/common/has_taskmodule.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Any, Dict, Optional - -from pie_core import AutoTaskModule, TaskModule - -from pie_modules.models.interface import RequiresTaskmoduleConfig - - -class HasTaskmodule(RequiresTaskmoduleConfig): - """A mixin class for models that have a taskmodule. - - Args: - taskmodule_config: The config for the taskmodule which can be obtained from the - taskmodule.config property. - """ - - def __init__(self, taskmodule_config: Optional[Dict[str, Any]] = None, **kwargs): - super().__init__(**kwargs) - self.taskmodule: Optional[TaskModule] = None - if taskmodule_config is not None: - self.taskmodule = AutoTaskModule.from_config(taskmodule_config) diff --git a/src/pie_modules/models/common/model_with_boilerplate.py b/src/pie_modules/models/common/model_with_boilerplate.py deleted file mode 100644 index a58d51e5a..000000000 --- a/src/pie_modules/models/common/model_with_boilerplate.py +++ /dev/null @@ -1,80 +0,0 @@ -import logging -from typing import Generic, Optional, Tuple, TypeVar - -from typing_extensions import TypeAlias - -from .model_with_metrics_from_taskmodule import ModelWithMetricsFromTaskModule -from .stages import TESTING, TRAINING, VALIDATION - -InputType = TypeVar("InputType") -OutputType = TypeVar("OutputType") -TargetType = TypeVar("TargetType") -StepInputType: TypeAlias = Tuple[InputType, TargetType] -StepOutputType = TypeVar("StepOutputType") - -logger = logging.getLogger(__name__) - - -class ModelWithBoilerplate( - ModelWithMetricsFromTaskModule[InputType, TargetType, OutputType], - Generic[InputType, OutputType, TargetType, StepOutputType], -): - """A PyTorchIEModel that adds boilerplate code for training, validation, and testing. - - Especially, it handles updating the metrics and logging of losses and metric results. Also see - ModelWithMetricsFromTaskModule for more details on how metrics are handled. - """ - - def get_loss_from_outputs(self, outputs: OutputType) -> StepOutputType: - if hasattr(outputs, "loss"): - return outputs.loss - else: - raise ValueError( - f"The model {self.__class__.__name__} does not define a 'loss' attribute in its output, " - "so the loss cannot be automatically extracted from the outputs. Please override the" - "get_loss_from_outputs() method for this model." - ) - - def log_loss(self, stage: str, loss: StepOutputType) -> None: - # show loss on each step only during training - self.log( - f"loss/{stage}", - loss, - on_step=(stage == TRAINING), - on_epoch=True, - prog_bar=True, - sync_dist=True, - ) - - def _step(self, stage: str, batch: StepInputType) -> StepOutputType: - inputs, targets = batch - outputs = self(inputs=inputs, targets=targets) - loss = self.get_loss_from_outputs(outputs=outputs) - self.log_loss(stage=stage, loss=loss) - self.update_metric(inputs=inputs, outputs=outputs, targets=targets, stage=stage) - - return loss - - def training_step(self, batch: StepInputType, batch_idx: int) -> StepOutputType: - return self._step(stage=TRAINING, batch=batch) - - def validation_step(self, batch: StepInputType, batch_idx: int) -> StepOutputType: - return self._step(stage=VALIDATION, batch=batch) - - def test_step(self, batch: StepInputType, batch_idx: int) -> StepOutputType: - return self._step(stage=TESTING, batch=batch) - - def predict_step( - self, batch: StepInputType, batch_idx: int, dataloader_idx: int = 0 - ) -> TargetType: - inputs, targets = batch - return self.predict(inputs=inputs) - - def on_train_epoch_end(self) -> None: - self.log_metric(stage=TRAINING) - - def on_validation_epoch_end(self) -> None: - self.log_metric(stage=VALIDATION) - - def on_test_epoch_end(self) -> None: - self.log_metric(stage=TESTING) diff --git a/src/pie_modules/models/common/model_with_metrics_from_taskmodule.py b/src/pie_modules/models/common/model_with_metrics_from_taskmodule.py deleted file mode 100644 index ea65ffd35..000000000 --- a/src/pie_modules/models/common/model_with_metrics_from_taskmodule.py +++ /dev/null @@ -1,152 +0,0 @@ -import logging -from typing import Dict, Generic, List, Optional, Set, TypeVar, Union - -from pie_core.utils.dictionary import flatten_dict_s -from pytorch_ie import PyTorchIEModel -from torchmetrics import Metric, MetricCollection - -from .has_taskmodule import HasTaskmodule -from .stages import TESTING, TRAINING, VALIDATION - -InputType = TypeVar("InputType") -TargetType = TypeVar("TargetType") -OutputType = TypeVar("OutputType") - -logger = logging.getLogger(__name__) - - -class ModelWithMetricsFromTaskModule( - HasTaskmodule, PyTorchIEModel, Generic[InputType, TargetType, OutputType] -): - """A PyTorchIEModel that adds metrics from a taskmodule. - - The metrics are added to the model as attributes with the names metric_{stage} via - setup_metrics method, where stage is one of "train", "val", or "test". The metrics are updated - with the update_metric method and logged with the log_metric method. - - Args: - metric_stages: The stages for which to set up metrics. Must be one of "train", "val", or - "test". - metric_intervals: A dict mapping metric stages to the number of steps between metric - calculation. If not provided, the metrics are calculated at the end of each epoch. - metric_call_predict: Whether to call predict() and use its result for metric calculation - instead of the (decoded) model output. This is useful, for instance, for generative models - that define special logic to produce predictions, e.g. beam search, which requires multiple - passes through the model. If True, predict() is called for all metric stages. If False (default), - the model outputs are passed to decode() and that is used for all metric stages. If a list of - metric stages is provided, predict() is called for these stages and the (decoded) model - outputs for the remaining stages. - """ - - def __init__( - self, - metric_stages: List[str] = [TRAINING, VALIDATION, TESTING], - metric_intervals: Optional[Dict[str, int]] = None, - metric_call_predict: Union[bool, List[str]] = False, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.setup_metrics(metric_stages=metric_stages) - - self.metric_intervals = metric_intervals or {} - missed_stages = set(self.metric_intervals) - set(metric_stages) - if len(missed_stages) > 0: - logger.warning( - f"There are stages in metric_intervals that are not in metric_stages: " - f"{missed_stages}. Available metric stages: {metric_stages}." - ) - - self.use_prediction_for_metrics: Set[str] - if isinstance(metric_call_predict, bool): - self.metric_call_predict = set(metric_stages) if metric_call_predict else set() - else: - self.metric_call_predict = set(metric_call_predict) - missed_stages = self.metric_call_predict - set(metric_stages) - if len(missed_stages) > 0: - logger.warning( - f"There are stages in metric_call_predict that are not in metric_stages: " - f"{missed_stages}. Available metric stages: {metric_stages}." - ) - - def setup_metrics(self, metric_stages: List[str]) -> None: - """Set up metrics for the given stages if a taskmodule is available. - - Args: - metric_stages: The stages for which to set up metrics. Must be one of "train", "val", or - "test". - """ - if self.taskmodule is not None: - for stage in metric_stages: - metric = self.taskmodule.configure_model_metric(stage=stage) - if metric is not None: - self._set_metric(stage=stage, metric=metric) - else: - logger.warning( - f"The taskmodule {self.taskmodule.__class__.__name__} does not define a metric for stage " - f"'{stage}'." - ) - elif len(metric_stages) > 0: - logger.warning( - "No taskmodule is available, so no metrics are set up. " - "Please provide a taskmodule_config to enable metrics for stages " - f"{metric_stages}." - ) - - def _get_metric( - self, stage: str, batch_idx: int = 0 - ) -> Optional[Union[Metric, MetricCollection]]: - metric_interval = self.metric_intervals.get(stage, 1) - if (batch_idx + 1) % metric_interval == 0: - return getattr(self, f"metric_{stage}", None) - else: - return None - - def _set_metric(self, stage: str, metric: Optional[Union[Metric, MetricCollection]]) -> None: - setattr(self, f"metric_{stage}", metric) - - def update_metric( - self, - stage: str, - inputs: InputType, - targets: TargetType, - outputs: OutputType, - ) -> None: - """Update the metric for the given stage. If outputs is provided, the predictions are - decoded from the outputs. Otherwise, the predictions are obtained by directly calling the - predict method with the inputs (note that this causes the model to be called a second - time). Finally, the metric is updated with the predictions and targets. - - Args: - stage: The stage for which to update the metric. Must be one of "train", "val", or "test". - inputs: The inputs to the model. - targets: The targets for the inputs. - outputs: The outputs of the model. They are decoded into predictions if provided. If - outputs is None, the predictions are obtained by directly calling the predict method - on the inputs. - """ - - metric = self._get_metric(stage=stage) - if metric is not None: - if stage in self.metric_call_predict: - predictions = self.predict(inputs=inputs) - else: - predictions = self.decode(inputs=inputs, outputs=outputs) - metric.update(predictions, targets) - - def log_metric(self, stage: str, reset: bool = True) -> None: - """Log the metric for the given stage and reset it.""" - - metric = self._get_metric(stage=stage) - if metric is not None: - values = metric.compute() - log_kwargs = {"on_step": False, "on_epoch": True, "sync_dist": True} - if isinstance(values, dict): - values_flat = flatten_dict_s(values, sep="/") - for key, value in values_flat.items(): - self.log(f"metric/{key}/{stage}", value, **log_kwargs) - else: - metric_name = getattr(metric, "name", None) or type(metric).__name__ - self.log(f"metric/{metric_name}/{stage}", values, **log_kwargs) - if reset: - metric.reset() diff --git a/src/pie_modules/models/common/stages.py b/src/pie_modules/models/common/stages.py deleted file mode 100644 index 7299e2092..000000000 --- a/src/pie_modules/models/common/stages.py +++ /dev/null @@ -1,3 +0,0 @@ -TRAINING = "train" -VALIDATION = "val" -TESTING = "test" diff --git a/src/pie_modules/models/components/__init__.py b/src/pie_modules/models/components/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/pie_modules/models/components/pointer_head.py b/src/pie_modules/models/components/pointer_head.py deleted file mode 100644 index e8a02acfa..000000000 --- a/src/pie_modules/models/components/pointer_head.py +++ /dev/null @@ -1,357 +0,0 @@ -from typing import Dict, List, Optional, Tuple, Union - -import torch -import torch.nn.functional as F -import torch.utils.checkpoint -from torch import nn -from torch.nn import CrossEntropyLoss -from transformers.utils import logging - -logger = logging.get_logger(__name__) - - -class PointerHead(torch.nn.Module): - # Copy and generate, - def __init__( - self, - # (decoder) input space - target_token_ids: List[int], - # output space (targets) - bos_id: int, - eos_id: int, - pad_id: int, - # embeddings - embeddings: nn.Embedding, - embedding_weight_mapping: Optional[Dict[Union[int, str], List[int]]] = None, - # other parameters - use_encoder_mlp: bool = False, - use_constraints_encoder_mlp: bool = False, - decoder_position_id_mode: Optional[nn.Module] = None, - decoder_position_id_pattern: Optional[List[int]] = None, - decoder_position_id_mapping: Optional[Dict[str, int]] = None, - ): - super().__init__() - - self.embeddings = embeddings - - self.pointer_offset = len(target_token_ids) - - # check that bos, eos, and pad are not out of bounds - for target_id, target_id_name in zip( - [bos_id, eos_id, pad_id], ["bos_id", "eos_id", "pad_id"] - ): - if target_id >= len(target_token_ids): - raise ValueError( - f"{target_id_name} [{target_id}] must be smaller than the number of target token ids " - f"[{len(target_token_ids)}]!" - ) - - self.bos_id = bos_id - self.eos_id = eos_id - self.pad_id = pad_id - # all ids that are not bos, eos or pad are label ids - self.label_ids = [ - target_id - for target_id in range(len(target_token_ids)) - if target_id not in [self.bos_id, self.eos_id, self.pad_id] - ] - - target2token_id = torch.LongTensor(target_token_ids) - self.register_buffer("target2token_id", target2token_id) - self.label_token_ids = self.target2token_id[self.label_ids] - self.eos_token_id = target_token_ids[self.eos_id] - self.pad_token_id = target_token_ids[self.pad_id] - - hidden_size = self.embeddings.embedding_dim - if use_encoder_mlp: - self.encoder_mlp = nn.Sequential( - nn.Linear(hidden_size, hidden_size), - nn.Dropout(0.3), - nn.ReLU(), - nn.Linear(hidden_size, hidden_size), - ) - if use_constraints_encoder_mlp: - self.constraints_encoder_mlp = nn.Sequential( - nn.Linear(hidden_size, hidden_size), - nn.Dropout(0.3), - nn.ReLU(), - nn.Linear(hidden_size, hidden_size), - ) - - self.embedding_weight_mapping = None - if embedding_weight_mapping is not None: - # Because of config serialization, the keys may be strings. Convert them back to ints. - self.embedding_weight_mapping = { - int(k): v for k, v in embedding_weight_mapping.items() - } - - self.decoder_position_id_mode = decoder_position_id_mode - self.decoder_position_id_mapping = decoder_position_id_mapping - if self.decoder_position_id_mode is None: - pass - elif self.decoder_position_id_mode in ["pattern", "pattern_with_increment"]: - if decoder_position_id_pattern is None: - raise ValueError( - "decoder_position_id_pattern must be provided when using " - 'decoder_position_id_mode="pattern" or "pattern_with_increment"!' - ) - self.register_buffer( - "decoder_position_id_pattern", torch.tensor(decoder_position_id_pattern) - ) - elif self.decoder_position_id_mode == "mapping": - if self.decoder_position_id_mapping is None: - raise ValueError( - 'decoder_position_id_mode="mapping" requires decoder_position_id_mapping to be provided!' - ) - else: - raise ValueError( - f'decoder_position_id_mode="{self.decoder_position_id_mode}" is not supported, ' - 'use one of "pattern", "pattern_with_increment", or "mapping"!' - ) - - @property - def use_prepared_position_ids(self): - return self.decoder_position_id_mode is not None - - def set_embeddings(self, embedding: nn.Embedding) -> None: - self.embeddings = embedding - - def overwrite_embeddings_with_mapping(self) -> None: - """Overwrite individual embeddings with embeddings for other tokens. - - This is useful, for instance, if the label vocabulary is a subset of the source vocabulary. - In this case, this method can be used to initialize each label embedding with one or - multiple (averaged) source embeddings. - """ - if self.embedding_weight_mapping is not None: - for special_token_index, source_indices in self.embedding_weight_mapping.items(): - self.embeddings.weight.data[special_token_index] = self.embeddings.weight.data[ - source_indices - ].mean(dim=0) - - def prepare_decoder_input_ids( - self, - input_ids: torch.LongTensor, - encoder_input_ids: torch.LongTensor, - ) -> torch.LongTensor: - mapping_token_mask = input_ids.lt(self.pointer_offset) - mapped_tokens = input_ids.masked_fill(input_ids.ge(self.pointer_offset), 0) - tag_mapped_tokens = self.target2token_id[mapped_tokens] - - encoder_input_ids_index = input_ids - self.pointer_offset - encoder_input_ids_index = encoder_input_ids_index.masked_fill( - encoder_input_ids_index.lt(0), 0 - ) - encoder_input_length = encoder_input_ids.size(1) - if encoder_input_ids_index.max() >= encoder_input_length: - raise ValueError( - f"encoder_input_ids_index.max() [{encoder_input_ids_index.max()}] must be smaller " - f"than encoder_input_length [{encoder_input_length}]!" - ) - - word_mapped_tokens = encoder_input_ids.gather(index=encoder_input_ids_index, dim=1) - - decoder_input_ids = torch.where( - mapping_token_mask, tag_mapped_tokens, word_mapped_tokens - ).to(torch.long) - - # Note: we do not need to explicitly handle the padding (via a decoder attention mask) because - # it gets automatically mapped to the pad token id - - return decoder_input_ids - - def prepare_decoder_position_ids(self, input_ids: torch.LongTensor) -> torch.LongTensor: - if self.decoder_position_id_mode in ["pattern", "pattern_with_increment"]: - bsz, tokens_len = input_ids.size() - pattern_len = len(self.decoder_position_id_pattern) - # the number of full and partly records. note that tokens_len includes the bos token - repeat_num = (tokens_len - 2) // pattern_len + 1 - position_ids = self.decoder_position_id_pattern.repeat(bsz, repeat_num) - - if self.decoder_position_id_mode == "pattern_with_increment": - position_ids_reshaped = position_ids.view(bsz, -1, pattern_len) - add_shift_pos = ( - torch.arange(0, repeat_num, device=position_ids_reshaped.device) - .repeat(bsz) - .view(bsz, -1) - .unsqueeze(-1) - ) - # multiply by the highest position id in the pattern so that the position ids are unique - # for any decoder_position_id_pattern across all records - add_shift_pos *= max(self.decoder_position_id_pattern) + 1 - position_ids_reshaped = add_shift_pos + position_ids_reshaped - position_ids = position_ids_reshaped.view(bsz, -1).long() - # use start_position_id=0 - start_pos = torch.zeros(bsz, 1, dtype=position_ids.dtype, device=position_ids.device) - # shift by 2 to account for start_position_id=0 and pad_position_id=1 - all_position_ids = torch.cat([start_pos, position_ids + 2], dim=-1) - all_position_ids_truncated = all_position_ids[:bsz, :tokens_len] - - # mask the padding tokens - mask_invalid = input_ids.eq(self.pad_id) - all_position_ids_truncated_masked = all_position_ids_truncated.masked_fill( - mask_invalid, 1 - ) - - return all_position_ids_truncated_masked - elif self.decoder_position_id_mode == "mapping": - # we ignor the typing issue here because we ensure that the mapping is not None in the __init__ - mapping: Dict[str, int] = self.decoder_position_id_mapping # type: ignore - if "default" not in mapping: - raise ValueError( - f"mapping must contain a default entry, but only contains {list(mapping)}!" - ) - position_ids = input_ids.new_full(input_ids.size(), fill_value=mapping["default"]) - # ensure that values for all vocab entries are set first - if "vocab" in mapping: - position_ids[input_ids.lt(self.pointer_offset)] = mapping["vocab"] - already_set: Dict[int, Tuple[str, int]] = {} - for key, value in mapping.items(): - if key in ["default", "vocab"]: - continue - elif key == "bos": - input_id = self.bos_id - elif key == "eos": - input_id = self.eos_id - elif key == "pad": - input_id = self.pad_id - else: - raise ValueError(f"Mapping contains unknown key '{key}' (mapping: {mapping}).") - if already_set.get(input_id, (key, value))[1] != value: - previous_key, previous_value = already_set[input_id] - raise ValueError( - f"Can not set the position ids for '{key}' to {value} because it was already " - f"set to {previous_value} by key '{previous_key}'. Note that both, '{key}' and " - f"'{previous_key}', have the same id ({input_id}), so their position_ids need to " - f"be also the same (position id mapping: {mapping})." - ) - position_ids[input_ids.eq(input_id)] = value - already_set[input_id] = key, value - return position_ids - else: - raise ValueError( - f"decoder_position_id_mode={self.decoder_position_id_mode} not supported!" - ) - - def prepare_decoder_inputs( - self, - input_ids: torch.LongTensor, - encoder_input_ids: torch.LongTensor, - position_ids: Optional[torch.LongTensor] = None, - ) -> Dict[str, torch.Tensor]: - inputs = {} - if self.use_prepared_position_ids: - if position_ids is None: - position_ids = self.prepare_decoder_position_ids(input_ids=input_ids) - inputs["position_ids"] = position_ids - - inputs["input_ids"] = self.prepare_decoder_input_ids( - input_ids=input_ids, - encoder_input_ids=encoder_input_ids, - ) - return inputs - - def forward( - self, - last_hidden_state, - encoder_input_ids, - encoder_last_hidden_state, - encoder_attention_mask, - labels: Optional[torch.LongTensor] = None, - decoder_attention_mask: Optional[torch.LongTensor] = None, - constraints: Optional[torch.LongTensor] = None, - ): - # assemble the logits - logits = last_hidden_state.new_full( - ( - last_hidden_state.size(0), - last_hidden_state.size(1), - self.pointer_offset + encoder_input_ids.size(-1), - ), - fill_value=-1e24, - ) - - # eos and label scores depend only on the decoder output - # bsz x max_len x 1 - eos_scores = F.linear(last_hidden_state, self.embeddings.weight[[self.eos_token_id]]) - label_embeddings = self.embeddings.weight[self.label_token_ids] - # bsz x max_len x num_class - label_scores = F.linear(last_hidden_state, label_embeddings) - - # the pointer depends on the src token embeddings, the encoder output and the decoder output - # bsz x max_bpe_len x hidden_size - src_outputs = encoder_last_hidden_state - if getattr(self, "encoder_mlp", None) is not None: - src_outputs = self.encoder_mlp(src_outputs) - - # bsz x max_word_len x hidden_size - input_embed = self.embeddings(encoder_input_ids) - - # bsz x max_len x max_word_len - word_scores = torch.einsum("blh,bnh->bln", last_hidden_state, src_outputs) - gen_scores = torch.einsum("blh,bnh->bln", last_hidden_state, input_embed) - avg_word_scores = (gen_scores + word_scores) / 2 - - # never point to the padding or the eos token in the encoder input - # TODO: why not excluding the bos token? seems to give worse results, but not tested extensively - mask_invalid = encoder_attention_mask.eq(0) | encoder_input_ids.eq(self.eos_token_id) - avg_word_scores = avg_word_scores.masked_fill(mask_invalid.unsqueeze(1), -1e32) - - # Note: the remaining row in logits contains the score for the bos token which should be never generated! - logits[:, :, [self.eos_id]] = eos_scores - logits[:, :, self.label_ids] = label_scores - logits[:, :, self.pointer_offset :] = avg_word_scores - - loss = None - # compute the loss if labels are provided - if labels is not None: - loss_fct = CrossEntropyLoss() - logits_resized = logits.reshape(-1, logits.size(-1)) - labels_resized = labels.reshape(-1) - if decoder_attention_mask is None: - raise ValueError("decoder_attention_mask must be provided to compute the loss!") - mask_resized = decoder_attention_mask.reshape(-1) - labels_masked = labels_resized.masked_fill( - ~mask_resized.to(torch.bool), loss_fct.ignore_index - ) - loss = loss_fct(logits_resized, labels_masked) - - # compute the constraints loss if constraints are provided - if constraints is not None: - if getattr(self, "constraints_encoder_mlp", None) is not None: - # TODO: is it fine to apply constraints_encoder_mlp to both src_outputs and label_embeddings? - # This is what the original code seems to do, but this is different from the usage of encoder_mlp. - constraints_src_outputs = self.constraints_encoder_mlp(src_outputs) - constraints_label_embeddings = self.constraints_encoder_mlp(label_embeddings) - else: - constraints_src_outputs = src_outputs - constraints_label_embeddings = label_embeddings - constraints_label_scores = F.linear(last_hidden_state, constraints_label_embeddings) - # bsz x max_len x max_word_len - constraints_word_scores = torch.einsum( - "blh,bnh->bln", last_hidden_state, constraints_src_outputs - ) - constraints_logits = last_hidden_state.new_full( - ( - last_hidden_state.size(0), - last_hidden_state.size(1), - self.pointer_offset + encoder_input_ids.size(-1), - ), - fill_value=-1e24, - ) - constraints_logits[:, :, self.label_ids] = constraints_label_scores - constraints_logits[:, :, self.pointer_offset :] = constraints_word_scores - - mask = constraints >= 0 - constraints_logits_valid = constraints_logits[mask] - constraints_valid = constraints[mask] - loss_c = F.binary_cross_entropy( - torch.sigmoid(constraints_logits_valid), constraints_valid.float() - ) - - if loss is None: - loss = loss_c - else: - loss += loss_c - - return logits, loss diff --git a/src/pie_modules/models/components/pooler.py b/src/pie_modules/models/components/pooler.py index b835e2f9c..e69de29bb 100644 --- a/src/pie_modules/models/components/pooler.py +++ b/src/pie_modules/models/components/pooler.py @@ -1,274 +0,0 @@ -import logging -from typing import Any, Callable, Dict, Tuple, Union - -import torch -from torch import Tensor, cat, nn - -# possible pooler types -CLS_TOKEN = "cls_token" # CLS token -START_TOKENS = "start_tokens" # MTB start tokens concat -MENTION_POOLING = "mention_pooling" # mention token pooling and concat - - -logger = logging.getLogger(__name__) - - -def pool_cls(hidden_state: Tensor, **kwargs) -> Tensor: - return hidden_state[:, 0, :] - - -class AtIndexPooler(nn.Module): - """Pooler that takes the hidden states at given indices. If the index is negative, a learned - embedding is used. - - The indices are expected to have the shape [batch_size, num_indices]. The resulting embeddings are concatenated, - so the output shape is [batch_size, num_indices * input_dim]. - - Args: - input_dim: The input dimension of the hidden state. - num_indices: The number of indices to pool. - offset: An offset to add to the indices. This can be useful if the input is prepared with special - tokens at the beginning / at the end of indexed sequences, and we want to use the hidden state of this - token instead of the first / last token of the sequence. - - Returns: - The pooled hidden states with shape [batch_size, num_indices * input_dim]. - """ - - def __init__(self, input_dim: int, num_indices: int = 2, offset: int = 0, **kwargs): - super().__init__(**kwargs) - self.input_dim = input_dim - self.num_indices = num_indices - self.offset = offset - self.missing_embeddings = nn.Parameter(torch.empty(num_indices, self.input_dim)) - nn.init.normal_(self.missing_embeddings) - - def forward(self, hidden_state: Tensor, indices: Tensor, **kwargs) -> Tensor: - batch_size, seq_len, hidden_size = hidden_state.shape - if indices.shape[1] != self.num_indices: - raise ValueError( - f"number of indices [{indices.shape[1]}] has to be the same as num_types [{self.num_indices}]" - ) - - # respect the offset - indices = indices + self.offset - - # times num_types due to concat - result = torch.zeros( - batch_size, hidden_size * self.num_indices, device=hidden_state.device - ) - for batch_idx, current_indices in enumerate(indices): - current_embeddings = [ - ( - hidden_state[batch_idx, current_indices[i], :] - if current_indices[i] >= 0 - else self.missing_embeddings[i] - ) - for i in range(self.num_indices) - ] - result[batch_idx] = cat(current_embeddings, 0) - return result - - @property - def output_dim(self) -> int: - return self.input_dim * self.num_indices - - -class ArgumentWrappedPooler(nn.Module): - """Wraps a pooler and maps the arguments to the pooler. - - Args: - pooler: The pooler to wrap. - argument_mapping: A mapping from the arguments of the forward method to the arguments of the pooler. - """ - - def __init__( - self, pooler: Union[nn.Module, Callable], argument_mapping: Dict[str, str], **kwargs - ): - super().__init__(**kwargs) - self.pooler = pooler - self.argument_mapping = argument_mapping - - def forward(self, hidden_state: Tensor, **kwargs) -> Tensor: - pooler_kwargs = {} - for k, v in kwargs.items(): - if k in self.argument_mapping: - pooler_kwargs[self.argument_mapping[k]] = v - return self.pooler(hidden_state, **pooler_kwargs) - - -class SpanMaxPooler(nn.Module): - """Pooler that takes the max hidden state over spans. If the start or end index is negative, a - learned. - - embedding is used. The indices are expected to have the shape [batch_size, num_indices]. The resulting embeddings - are concatenated, so the output shape is [batch_size, num_indices * input_dim]. - - Args: - input_dim: The input dimension of the hidden state. - num_indices: The number of indices to pool. - - Returns: - The pooled hidden states with shape [batch_size, num_indices * input_dim]. - """ - - def __init__(self, input_dim: int, num_indices: int = 2, **kwargs): - super().__init__(**kwargs) - self.input_dim = input_dim - self.num_indices = num_indices - self.missing_embeddings = nn.Parameter(torch.empty(num_indices, self.input_dim)) - nn.init.normal_(self.missing_embeddings) - - def forward( - self, hidden_state: Tensor, start_indices: Tensor, end_indices: Tensor, **kwargs - ) -> Tensor: - batch_size, seq_len, hidden_size = hidden_state.shape - if start_indices.shape[1] != self.num_indices: - raise ValueError( - f"number of start indices [{start_indices.shape[1]}] has to be the same as num_types [{self.num_indices}]" - ) - - if end_indices.shape[1] != self.num_indices: - raise ValueError( - f"number of end indices [{end_indices.shape[1]}] has to be the same as num_types [{self.num_indices}]" - ) - - # check that start_indices are before end_indices - mask_both_positive = (start_indices >= 0) & (end_indices >= 0) - mask_start_before_end = start_indices < end_indices - mask_valid = mask_start_before_end | ~mask_both_positive - if not torch.all(mask_valid): - raise ValueError( - f"values in start_indices have to be smaller than respective values in " - f"end_indices, but start_indices=\n{start_indices}\n and end_indices=\n{end_indices}" - ) - - # times num_indices due to concat - result = torch.zeros( - batch_size, hidden_size * self.num_indices, device=hidden_state.device - ) - for batch_idx in range(batch_size): - current_start_indices = start_indices[batch_idx] - current_end_indices = end_indices[batch_idx] - current_embeddings = [ - ( - torch.amax( - hidden_state[ - batch_idx, current_start_indices[i] : current_end_indices[i], : - ], - 0, - ) - if current_start_indices[i] >= 0 and current_end_indices[i] >= 0 - else self.missing_embeddings[i] - ) - for i in range(self.num_indices) - ] - result[batch_idx] = cat(current_embeddings, 0) - - return result - - @property - def output_dim(self) -> int: - return self.input_dim * self.num_indices - - -class SpanMeanPooler(nn.Module): - """Pooler that takes the mean hidden state over spans. If the start or end index is negative, a - learned embedding is used. The indices are expected to have the shape [batch_size, - num_indices]. - - The resulting embeddings are concatenated, so the output shape is [batch_size, num_indices * input_dim]. - Note this a slightly modified version of the pie_modules.models.components.pooler.SpanMaxPooler, - i.e. we changed the aggregation method from torch.amax to torch.mean. - - Args: - input_dim: The input dimension of the hidden state. - num_indices: The number of indices to pool. - - Returns: - The pooled hidden states with shape [batch_size, num_indices * input_dim]. - """ - - def __init__(self, input_dim: int, num_indices: int = 2, **kwargs): - super().__init__(**kwargs) - self.input_dim = input_dim - self.num_indices = num_indices - self.missing_embeddings = nn.Parameter(torch.empty(num_indices, self.input_dim)) - nn.init.normal_(self.missing_embeddings) - - def forward( - self, hidden_state: Tensor, start_indices: Tensor, end_indices: Tensor, **kwargs - ) -> Tensor: - batch_size, seq_len, hidden_size = hidden_state.shape - if start_indices.shape[1] != self.num_indices: - raise ValueError( - f"number of start indices [{start_indices.shape[1]}] has to be the same as num_types [{self.num_indices}]" - ) - - if end_indices.shape[1] != self.num_indices: - raise ValueError( - f"number of end indices [{end_indices.shape[1]}] has to be the same as num_types [{self.num_indices}]" - ) - - # check that start_indices are before end_indices - mask_both_positive = (start_indices >= 0) & (end_indices >= 0) - mask_start_before_end = start_indices < end_indices - mask_valid = mask_start_before_end | ~mask_both_positive - if not torch.all(mask_valid): - raise ValueError( - f"values in start_indices have to be smaller than respective values in " - f"end_indices, but start_indices=\n{start_indices}\n and end_indices=\n{end_indices}" - ) - - # times num_indices due to concat - result = torch.zeros( - batch_size, hidden_size * self.num_indices, device=hidden_state.device - ) - for batch_idx in range(batch_size): - current_start_indices = start_indices[batch_idx] - current_end_indices = end_indices[batch_idx] - current_embeddings = [ - ( - torch.mean( - hidden_state[ - batch_idx, current_start_indices[i] : current_end_indices[i], : - ], - dim=0, - ) - if current_start_indices[i] >= 0 and current_end_indices[i] >= 0 - else self.missing_embeddings[i] - ) - for i in range(self.num_indices) - ] - result[batch_idx] = cat(current_embeddings, 0) - - return result - - @property - def output_dim(self) -> int: - return self.input_dim * self.num_indices - - -def get_pooler_and_output_size(config: Dict[str, Any], input_dim: int) -> Tuple[Callable, int]: - pooler_config = dict(config) - pooler_type = pooler_config.pop("type", CLS_TOKEN) - if pooler_type == CLS_TOKEN: - return pool_cls, input_dim - elif pooler_type == START_TOKENS: - pooler = AtIndexPooler(input_dim=input_dim, offset=-1, **pooler_config) - pooler_wrapped = ArgumentWrappedPooler( - pooler=pooler, argument_mapping={"start_indices": "indices"} - ) - return pooler_wrapped, pooler.output_dim - elif pooler_type == MENTION_POOLING: - aggregate = pooler_config.pop("aggregate", "max") - if aggregate == "max": - pooler = SpanMaxPooler(input_dim=input_dim, **pooler_config) - return pooler, pooler.output_dim - elif aggregate == "mean": - pooler = SpanMeanPooler(input_dim=input_dim, **pooler_config) - return pooler, pooler.output_dim - else: - raise ValueError(f'Unknown aggregation method for mention pooling: "{aggregate}"') - else: - raise ValueError(f'Unknown pooler type "{pooler_type}"') diff --git a/src/pie_modules/models/components/seq2seq_encoder.py b/src/pie_modules/models/components/seq2seq_encoder.py deleted file mode 100644 index 52866be9b..000000000 --- a/src/pie_modules/models/components/seq2seq_encoder.py +++ /dev/null @@ -1,77 +0,0 @@ -import logging -from copy import copy -from typing import Any, Dict, List, Optional, Tuple - -from torch import Tensor, nn - -logger = logging.getLogger(__name__) - -RNN_TYPE2CLASS = {"lstm": nn.LSTM, "gru": nn.GRU, "rnn": nn.RNN} -ACTIVATION_TYPE2CLASS = { - "relu": nn.ReLU, - "tanh": nn.Tanh, - "sigmoid": nn.Sigmoid, - "gelu": nn.GELU, -} - - -class RNNWrapper(nn.Module): - def __init__(self, rnn: nn.Module): - super().__init__() - self.rnn = rnn - - def forward(self, *args, **kwargs) -> Tensor: - return self.rnn(*args, **kwargs)[0] - - @property - def output_size(self) -> int: - if self.rnn.bidirectional: - return self.rnn.hidden_size * 2 - else: - return self.rnn.hidden_size - - -def build_seq2seq_encoder( - config: Dict[str, Any], input_size: int -) -> Tuple[Optional[nn.Module], int]: - # copy the config to avoid side effects - config = copy(config) - seq2seq_encoder_type = config.pop("type", None) - if seq2seq_encoder_type is None: - logger.warning( - f"seq2seq_encoder_type is not specified in the seq2seq_encoder: {config}. " - f"Do not build this seq2seq_encoder." - ) - return None, input_size - - if seq2seq_encoder_type == "sequential": - modules: List[nn.Module] = [] - output_size = input_size - for key, subconfig in config.items(): - module, output_size = build_seq2seq_encoder(subconfig, input_size) - if module is not None: - modules.append(module) - input_size = output_size - - seq2seq_encoder = nn.Sequential(*modules) - elif seq2seq_encoder_type in RNN_TYPE2CLASS: - rnn_class = RNN_TYPE2CLASS[seq2seq_encoder_type] - seq2seq_encoder = RNNWrapper(rnn_class(input_size=input_size, batch_first=True, **config)) - output_size = seq2seq_encoder.output_size - elif seq2seq_encoder_type == "linear": - seq2seq_encoder = nn.Linear(in_features=input_size, **config) - output_size = seq2seq_encoder.out_features - elif seq2seq_encoder_type in ACTIVATION_TYPE2CLASS: - activation_class = ACTIVATION_TYPE2CLASS[seq2seq_encoder_type] - seq2seq_encoder = activation_class(**config) - output_size = input_size - elif seq2seq_encoder_type == "dropout": - seq2seq_encoder = nn.Dropout(**config) - output_size = input_size - elif seq2seq_encoder_type == "none": - seq2seq_encoder = None - output_size = input_size - else: - raise ValueError(f"Unknown seq2seq_encoder_type: {seq2seq_encoder_type}") - - return seq2seq_encoder, output_size diff --git a/src/pie_modules/models/interface.py b/src/pie_modules/models/interface.py deleted file mode 100644 index a5bc0bfad..000000000 --- a/src/pie_modules/models/interface.py +++ /dev/null @@ -1,12 +0,0 @@ -class RequiresMaxInputLength: - """Any class inheriting from this class should require a constructor parameter - 'max_input_length'.""" - - pass - - -class RequiresTaskmoduleConfig: - """Any class inheriting from this class should require a constructor parameter - 'taskmodule_config'.""" - - pass diff --git a/src/pie_modules/models/sequence_classification_with_pooler.py b/src/pie_modules/models/sequence_classification_with_pooler.py deleted file mode 100644 index bb7d72785..000000000 --- a/src/pie_modules/models/sequence_classification_with_pooler.py +++ /dev/null @@ -1,362 +0,0 @@ -import logging -from abc import ABC, abstractmethod -from typing import ( - Any, - Callable, - Dict, - Iterator, - List, - MutableMapping, - Optional, - Tuple, - TypeVar, - Union, -) - -import torch -from pytorch_ie import PyTorchIEModel -from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses -from torch import FloatTensor, LongTensor, nn -from torch.nn import Parameter -from torch.optim import AdamW -from transformers import ( - AutoConfig, - AutoModel, - PreTrainedModel, - get_linear_schedule_with_warmup, -) -from transformers.modeling_outputs import SequenceClassifierOutput -from typing_extensions import TypeAlias - -from .common import ModelWithBoilerplate -from .components.pooler import get_pooler_and_output_size - -# model inputs / outputs / targets -InputType: TypeAlias = MutableMapping[str, LongTensor] -OutputType: TypeAlias = SequenceClassifierOutput -TargetType: TypeAlias = MutableMapping[str, Union[LongTensor, FloatTensor]] -# step inputs (batch) / outputs (loss) -StepInputType: TypeAlias = Tuple[InputType, TargetType] -StepOutputType: TypeAlias = FloatTensor - - -HF_MODEL_TYPE_TO_CLASSIFIER_DROPOUT_ATTRIBUTE = { - "albert": "classifier_dropout_prob", - "distilbert": "seq_classif_dropout", -} - -logger = logging.getLogger(__name__) - -T = TypeVar("T") - - -def separate_arguments_by_prefix( - arguments: MutableMapping[str, T], prefixes: List[str] -) -> Dict[str, Dict[str, T]]: - result: Dict[str, Dict[str, T]] = {prefix: {} for prefix in prefixes + ["remaining"]} - for k, v in arguments.items(): - found = False - for prefix in prefixes: - if k.startswith(prefix): - result[prefix][k[len(prefix) :]] = v - found = True - break - if not found: - result["remaining"][k] = v - return result - - -class SequenceClassificationModelWithPoolerBase( - ABC, - ModelWithBoilerplate[InputType, OutputType, TargetType, StepOutputType], - RequiresModelNameOrPath, -): - """Abstract base model for sequence classification with a pooler. - - Args: - model_name_or_path: The name or path of the HuggingFace model to use. - tokenizer_vocab_size: The size of the tokenizer vocabulary. If provided, the model's - tokenizer embeddings are resized to this size. - classifier_dropout: The dropout probability for the classifier. If not provided, the - dropout probability is taken from the Huggingface model config. - learning_rate: The learning rate for the optimizer. - task_learning_rate: The learning rate for the task-specific parameters. If None, the - learning rate for all parameters is set to `learning_rate`. - warmup_proportion: The proportion of steps to warm up the learning rate. - pooler: The pooler configuration. If None, CLS token pooling is used. - freeze_base_model: If True, the base model parameters are frozen. - base_model_prefix: The prefix of the base model parameters when using a task_learning_rate - or freeze_base_model. If None, the base_model_prefix of the model is used. - **kwargs: Additional keyword arguments passed to the parent class, - see :class:`ModelWithBoilerplate`. - """ - - def __init__( - self, - model_name_or_path: str, - tokenizer_vocab_size: Optional[int] = None, - classifier_dropout: Optional[float] = None, - learning_rate: float = 1e-5, - task_learning_rate: Optional[float] = None, - warmup_proportion: float = 0.1, - pooler: Optional[Union[Dict[str, Any], str]] = None, - freeze_base_model: bool = False, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.save_hyperparameters() - - self.learning_rate = learning_rate - self.task_learning_rate = task_learning_rate - self.warmup_proportion = warmup_proportion - self.freeze_base_model = freeze_base_model - self.model_name_or_path = model_name_or_path - - self.model = self.setup_base_model() - - if tokenizer_vocab_size is not None: - self.model.resize_token_embeddings(tokenizer_vocab_size) - - if self.freeze_base_model: - for param in self.model.parameters(): - param.requires_grad = False - - if classifier_dropout is None: - # Get the classifier dropout value from the Huggingface model config. - # This is a bit of a mess since some Configs use different variable names or change the semantics - # of the dropout (e.g. DistilBert has one dropout prob for QA and one for Seq classification, and a - # general one for embeddings, encoder and pooler). - classifier_dropout_attr = HF_MODEL_TYPE_TO_CLASSIFIER_DROPOUT_ATTRIBUTE.get( - self.model.config.model_type, "classifier_dropout" - ) - classifier_dropout = getattr(self.model.config, classifier_dropout_attr) or 0.0 - self.dropout = nn.Dropout(classifier_dropout) - - if isinstance(pooler, str): - pooler = {"type": pooler} - self.pooler_config = pooler or {} - self.pooler, pooler_output_dim = self.setup_pooler(input_dim=self.model.config.hidden_size) - self.classifier = self.setup_classifier(pooler_output_dim=pooler_output_dim) - self.loss_fct = self.setup_loss_fct() - - def setup_base_model(self) -> PreTrainedModel: - config = AutoConfig.from_pretrained(self.model_name_or_path) - if self.is_from_pretrained: - return AutoModel.from_config(config=config) - else: - return AutoModel.from_pretrained(self.model_name_or_path, config=config) - - @abstractmethod - def setup_classifier(self, pooler_output_dim: int) -> Callable: - pass - - @abstractmethod - def setup_loss_fct(self) -> Callable: - pass - - def setup_pooler(self, input_dim: int) -> Tuple[Callable, int]: - """Set up the pooler. The pooler is used to get a representation of the input sequence(s) - that can be used by the classifier. It is a callable that takes the hidden states of the - base model (and additional model inputs that are prefixed with "pooler_") and returns the - pooled output. - - Args: - input_dim: The input dimension of the pooler, i.e. the hidden size of the base model. - - Returns: - A tuple with the pooler and the output dimension of the pooler. - """ - return get_pooler_and_output_size(config=self.pooler_config, input_dim=input_dim) - - def get_pooled_output(self, model_inputs, pooler_inputs) -> torch.FloatTensor: - output = self.model(**model_inputs) - hidden_state = output.last_hidden_state - pooled_output = self.pooler(hidden_state, **pooler_inputs) - pooled_output = self.dropout(pooled_output) - return pooled_output - - def forward( - self, - inputs: InputType, - targets: Optional[TargetType] = None, - return_hidden_states: bool = False, - ) -> OutputType: - sanitized_inputs = separate_arguments_by_prefix(arguments=inputs, prefixes=["pooler_"]) - - pooled_output = self.get_pooled_output( - model_inputs=sanitized_inputs["remaining"], pooler_inputs=sanitized_inputs["pooler_"] - ) - - logits = self.classifier(pooled_output) - - result = {"logits": logits} - if targets is not None: - labels = targets["labels"] - loss = self.loss_fct(logits, labels) - result["loss"] = loss - if return_hidden_states: - raise NotImplementedError("return_hidden_states is not yet implemented") - - return SequenceClassifierOutput(**result) - - @abstractmethod - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - pass - - def base_model_named_parameters(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - if prefix: - prefix = f"{prefix}." - return self.model.named_parameters(prefix=f"{prefix}model") - - def task_named_parameters(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - if prefix: - prefix = f"{prefix}." - base_model_parameter_names = dict(self.base_model_named_parameters(prefix=prefix)).keys() - for name, param in self.named_parameters(prefix=prefix): - if name not in base_model_parameter_names: - yield name, param - - def configure_optimizers(self): - if self.task_learning_rate is not None: - base_model_params = (param for name, param in self.base_model_named_parameters()) - task_params = (param for name, param in self.task_named_parameters()) - optimizer = AdamW( - [ - {"params": base_model_params, "lr": self.learning_rate}, - {"params": task_params, "lr": self.task_learning_rate}, - ] - ) - else: - optimizer = AdamW(self.parameters(), lr=self.learning_rate) - - if self.warmup_proportion > 0.0: - stepping_batches = self.trainer.estimated_stepping_batches - scheduler = get_linear_schedule_with_warmup( - optimizer, int(stepping_batches * self.warmup_proportion), stepping_batches - ) - return [optimizer], [{"scheduler": scheduler, "interval": "step"}] - else: - return optimizer - - -@PyTorchIEModel.register() -class SequenceClassificationModelWithPooler( - SequenceClassificationModelWithPoolerBase, - RequiresNumClasses, -): - """A sequence classification model that uses a pooler to get a representation of the input - sequence and then applies a linear classifier to that representation. The pooler can be - configured via the `pooler` argument, see :func:`get_pooler_and_output_size` for details. - - Args: - num_classes: The number of classes for the classification task. - multi_label: If True, the model is trained as a multi-label classifier. - multi_label_threshold: The threshold for the multi-label classifier, i.e. the probability - above which a class is predicted. - **kwargs - """ - - def __init__( - self, - num_classes: int, - multi_label: bool = False, - multi_label_threshold: float = 0.5, - **kwargs, - ): - # set num_classes and multi_label before call to super init because they are used there - # in setup_classifier and setup_loss_fct - self.num_classes = num_classes - self.multi_label = multi_label - super().__init__(**kwargs) - - self.multi_label_threshold = multi_label_threshold - - def setup_classifier(self, pooler_output_dim: int) -> Callable: - return nn.Linear(pooler_output_dim, self.num_classes) - - def setup_loss_fct(self) -> Callable: - return nn.BCEWithLogitsLoss() if self.multi_label else nn.CrossEntropyLoss() - - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - if not self.multi_label: - labels = torch.argmax(outputs.logits, dim=-1).to(torch.long) - probabilities = torch.softmax(outputs.logits, dim=-1) - else: - probabilities = torch.sigmoid(outputs.logits) - labels = (probabilities > self.multi_label_threshold).to(torch.long) - return {"labels": labels, "probabilities": probabilities} - - -@PyTorchIEModel.register() -class SequencePairSimilarityModelWithPooler( - SequenceClassificationModelWithPoolerBase, -): - """A span pair similarity model to detect of two spans occurring in different texts are - similar. It uses an encoder to independently calculate contextualized embeddings of both texts, - then uses a pooler to get representations of the spans and, finally, calculates the cosine to - get the similarity scores. - - Args: - label_threshold: The threshold above which score the spans are considered as similar. - pooler: The pooler identifier or config, see :func:`get_pooler_and_output_size` for details. - Defaults to "mention_pooling" (max pooling over the span token embeddings). - **kwargs - """ - - def __init__( - self, - pooler: Optional[Union[Dict[str, Any], str]] = None, - **kwargs, - ): - if pooler is None: - # use (max) mention pooling per default - pooler = {"type": "mention_pooling", "num_indices": 1} - super().__init__(pooler=pooler, **kwargs) - - def setup_classifier( - self, pooler_output_dim: int - ) -> Callable[[torch.FloatTensor, torch.FloatTensor], torch.FloatTensor]: - return torch.nn.functional.cosine_similarity - - def setup_loss_fct(self) -> Callable: - return nn.BCELoss() - - def forward( - self, - inputs: InputType, - targets: Optional[TargetType] = None, - return_hidden_states: bool = False, - ) -> OutputType: - sanitized_inputs = separate_arguments_by_prefix( - # Note that the order of the prefixes is important because one is a prefix of the other, - # so we need to start with the longer! - arguments=inputs, - prefixes=["pooler_pair_", "pooler_"], - ) - - pooled_output = self.get_pooled_output( - model_inputs=sanitized_inputs["remaining"]["encoding"], - pooler_inputs=sanitized_inputs["pooler_"], - ) - pooled_output_pair = self.get_pooled_output( - model_inputs=sanitized_inputs["remaining"]["encoding_pair"], - pooler_inputs=sanitized_inputs["pooler_pair_"], - ) - - logits = self.classifier(pooled_output, pooled_output_pair) - - result = {"logits": logits} - if targets is not None: - labels = targets["scores"] - loss = self.loss_fct(logits, labels) - result["loss"] = loss - if return_hidden_states: - raise NotImplementedError("return_hidden_states is not yet implemented") - - return SequenceClassifierOutput(**result) - - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - # probabilities = torch.sigmoid(outputs.logits) - scores = outputs.logits - return {"scores": scores} diff --git a/src/pie_modules/models/simple_extractive_question_answering.py b/src/pie_modules/models/simple_extractive_question_answering.py deleted file mode 100644 index d54436515..000000000 --- a/src/pie_modules/models/simple_extractive_question_answering.py +++ /dev/null @@ -1,165 +0,0 @@ -from typing import Any, Dict, MutableMapping, Optional, Tuple - -from pytorch_ie import PyTorchIEModel -from pytorch_ie.models.interface import RequiresModelNameOrPath -from pytorch_lightning.utilities.types import OptimizerLRScheduler -from torch import Tensor -from torch.nn import ModuleDict, functional -from torch.optim import Adam -from torchmetrics import F1Score -from transformers import ( - AutoConfig, - AutoModelForQuestionAnswering, - BatchEncoding, - get_linear_schedule_with_warmup, -) -from transformers.modeling_outputs import QuestionAnsweringModelOutput -from typing_extensions import TypeAlias - -from pie_modules.models.interface import RequiresMaxInputLength - -BatchOutput: TypeAlias = Dict[str, Any] - -# The input to the forward method of this model. It is passed to -# the base transformer model. -ModelInputType: TypeAlias = MutableMapping[str, Any] -# The output of the forward method of this model. -ModelOutputType: TypeAlias = QuestionAnsweringModelOutput -# The input to the step methods, i.e. training_step, validation_step, test_step. -# It contains the input and target tensors for a single training step. -StepBatchEncoding: TypeAlias = Tuple[ - ModelInputType, - Optional[Dict[str, Tensor]], -] - - -TRAINING = "train" -VALIDATION = "val" -TEST = "test" - - -@PyTorchIEModel.register() -class SimpleExtractiveQuestionAnsweringModel( - PyTorchIEModel, RequiresModelNameOrPath, RequiresMaxInputLength -): - """A PIE model for extractive question answering. It is a simple Pytorch-Lightning module that - wraps around a question answering model from the Huggingface transformers library. The - ExtractiveQuestionAnsweringTaskModule can be used create the input and target encodings as well - as to decode the model output. - - Args: - model_name_or_path: The name (Huggingface Hub model identifier) or local path of the model to use. - max_input_length: The maximum length of the input sequence. Required for metric calculation. - learning_rate: The learning rate to use for training. Defaults to 1e-5. - """ - - def __init__( - self, - model_name_or_path: str, - max_input_length: int, - learning_rate: float = 1e-5, - warmup_proportion: float = 0.0, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.save_hyperparameters() - - self.learning_rate = learning_rate - self.warmup_proportion = warmup_proportion - self.max_input_length = max_input_length - - config = AutoConfig.from_pretrained(model_name_or_path) - if self.is_from_pretrained: - self.model = AutoModelForQuestionAnswering.from_config(config=config) - else: - self.model = AutoModelForQuestionAnswering.from_pretrained( - model_name_or_path, config=config - ) - - self.f1_start: Dict[str, F1Score] = ModuleDict( - { - f"stage_{stage}": F1Score(task="multiclass", num_classes=max_input_length) - for stage in [TRAINING, VALIDATION, TEST] - } - ) - self.f1_end: Dict[str, F1Score] = ModuleDict( - { - f"stage_{stage}": F1Score(task="multiclass", num_classes=max_input_length) - for stage in [TRAINING, VALIDATION, TEST] - } - ) - - def forward(self, inputs: BatchEncoding) -> ModelOutputType: - return self.model(**inputs) - - def step( - self, - stage: str, - batch: StepBatchEncoding, - ) -> Tensor: - inputs, targets = batch - if targets is None: - raise ValueError("targets has to be available for training, but it is None") - - output = self({**inputs, **targets}) - - loss = output.loss - # show loss on each step only during training - self.log(f"{stage}/loss", loss, on_step=(stage == TRAINING), on_epoch=True, prog_bar=True) - - start_positions = targets["start_positions"] - end_positions = targets["end_positions"] - start_logits = output.start_logits - end_logits = output.end_logits - - sequence_length = inputs["input_ids"].size(1) - f1_start = self.f1_end[f"stage_{stage}"] - # We need to pad the logits to the max_input_length, otherwise the F1 metric complains - # that the shape does not match the num_classes. - start_logits_padded = functional.pad( - start_logits, (0, self.max_input_length - sequence_length), value=float("-inf") - ) - f1_start(start_logits_padded, start_positions) - self.log( - f"{stage}/f1_start", - f1_start, - on_step=(stage == TRAINING), - on_epoch=True, - prog_bar=True, - ) - f1_end = self.f1_end[f"stage_{stage}"] - # We need to pad the logits to the max_input_length, otherwise the F1 metric complains - # that the shape does not match the num_classes. - end_logits_padded = functional.pad( - end_logits, (0, self.max_input_length - sequence_length), value=float("-inf") - ) - f1_end(end_logits_padded, end_positions) - self.log( - f"{stage}/f1_end", f1_end, on_step=(stage == TRAINING), on_epoch=True, prog_bar=True - ) - # log f1 as simple average of start and end f1. we need to call compute() on the metric to get - # the actual value, otherwise lightning complains that there is no model attribute with name "f1" - f1_value = (f1_start.compute() + f1_end.compute()) / 2 - self.log(f"{stage}/f1", f1_value, on_step=False, on_epoch=True, prog_bar=True) - return loss - - def training_step(self, batch: StepBatchEncoding, batch_idx: int) -> Tensor: - return self.step(stage=TRAINING, batch=batch) - - def validation_step(self, batch: StepBatchEncoding, batch_idx: int) -> Tensor: - return self.step(stage=VALIDATION, batch=batch) - - def test_step(self, batch: StepBatchEncoding, batch_idx: int) -> Tensor: - return self.step(stage=TEST, batch=batch) - - def configure_optimizers(self) -> OptimizerLRScheduler: - optimizer = Adam(self.parameters(), lr=self.learning_rate) - - if self.warmup_proportion > 0.0: - stepping_batches = self.trainer.estimated_stepping_batches - scheduler = get_linear_schedule_with_warmup( - optimizer, int(stepping_batches * self.warmup_proportion), stepping_batches - ) - return [optimizer], [{"scheduler": scheduler, "interval": "step"}] - else: - return optimizer diff --git a/src/pie_modules/models/simple_generative.py b/src/pie_modules/models/simple_generative.py deleted file mode 100644 index e5d7d4549..000000000 --- a/src/pie_modules/models/simple_generative.py +++ /dev/null @@ -1,196 +0,0 @@ -import copy -import logging -from typing import Any, Dict, Optional, Tuple, Type, Union - -import torch -from pie_core.utils.hydra import resolve_type -from pytorch_ie import PyTorchIEModel -from pytorch_lightning.utilities.types import OptimizerLRScheduler -from torch import FloatTensor, LongTensor -from torch.optim import Optimizer -from transformers import PreTrainedModel, SchedulerType, get_scheduler -from transformers.modeling_outputs import Seq2SeqLMOutput -from typing_extensions import TypeAlias - -from pie_modules.models.common import ModelWithBoilerplate - -logger = logging.getLogger(__name__) - -# model inputs / outputs / targets -InputType: TypeAlias = Dict[str, LongTensor] -OutputType: TypeAlias = Seq2SeqLMOutput -TargetType: TypeAlias = Dict[str, LongTensor] -# step inputs (batch) / outputs (loss) -StepInputType: TypeAlias = Tuple[InputType, TargetType] -StepOutputType: TypeAlias = FloatTensor - - -@PyTorchIEModel.register() -class SimpleGenerativeModel( - ModelWithBoilerplate[InputType, OutputType, TargetType, StepOutputType], -): - """This model is a simple wrapper around a generative model from Huggingface transformers. That - means, its predict() and predict_step() methods will call the generate() method of the base - model. - - If a taskmodule config is provided, the taskmodule will be instantiated and used to create metrics and - a generation config with its configure_model_metric() and configure_model_generation() methods, - respectively. - - If the base model has a configure_optimizer() method, this will be used to create the optimizer. Otherwise, - the optimizer_type and learning_rate will be used to create an optimizer. - - Args: - base_model_type: The type of the base model, e.g. "transformers.AutoModelForSeq2SeqLM". It should have a - from_pretrained() method. - base_model_config: A dictionary with the keyword arguments that will be passed to the from_pretrained() - method of the base model. - override_generation_kwargs: The generation config for the base model. This will override the generation config - from the taskmodule, if one is provided. - warmup_proportion: The proportion of the training steps that will be used for the warmup of the learning rate - scheduler. - learning_rate: The learning rate for the optimizer. If the base model has a configure_optimizer() method, this - will be ignored. - optimizer_type: The type of the optimizer. If the base model has a configure_optimizer() method, this will be - ignored. - **kwargs: Additional keyword arguments that will be passed to the PyTorchIEModel constructor. - """ - - def __init__( - self, - # base model setup - base_model: Optional[Dict[str, Any]] = None, - # old setup - base_model_type: Optional[str] = None, - base_model_config: Optional[Dict[str, Any]] = None, - # generation - override_generation_kwargs: Optional[Dict[str, Any]] = None, - # optimizer / schedular - # important: the following entries (optimizer_type and learning_rate) are only used - # if the base model does not have a configure_optimizer method! - optimizer_type: Optional[Union[str, Type[Optimizer]]] = None, - learning_rate: Optional[float] = None, - warmup_proportion: float = 0.0, - scheduler_name: Optional[Union[str, SchedulerType]] = None, - scheduler_kwargs: Optional[Dict[str, Any]] = None, - **kwargs, - ): - super().__init__(**kwargs) - - if base_model is None: - if base_model_type is None: - raise ValueError( - "Either base_model or base_model_type must be provided. If base_model is not provided, " - "base_model_type must be a valid model type, e.g. 'transformers.AutoModelForSeq2SeqLM'." - ) - logger.warning( - "The base_model_type and base_model_config arguments are deprecated. Please use base_model. " - "You can use the following code to create the base_model argument: " - "base_model = {'_type_': base_model_type, **base_model_config}" - ) - base_model = {"_type_": base_model_type, **(base_model_config or {})} - - if scheduler_name is None and warmup_proportion > 0.0: - logger.warning( - "warmup_proportion is set to a value > 0.0, but scheduler_name is not set. " - "Setting scheduler_name to 'linear' by default." - ) - scheduler_name = "linear" - - self.save_hyperparameters(ignore=["base_model_type", "base_model_config"]) - - # optimizer / scheduler - self.learning_rate = learning_rate - self.optimizer_type = optimizer_type - self.scheduler_name = scheduler_name - self.warmup_proportion = warmup_proportion - self.scheduler_kwargs = scheduler_kwargs or {} - - self.model = self.setup_base_model(config=base_model) - self.generation_config = self.configure_generation(**(override_generation_kwargs or {})) - - def setup_base_model(self, config: Dict[str, Any]) -> PreTrainedModel: - config = copy.copy(config) - resolved_base_model_type: Type[PreTrainedModel] = resolve_type(config.pop("_type_")) - return resolved_base_model_type.from_pretrained(**config) - - def configure_generation(self, **kwargs) -> Dict[str, Any]: - if self.taskmodule is not None: - # get the generation config from the taskmodule - generation_config = self.taskmodule.configure_model_generation() - else: - logger.warning( - "No taskmodule is available, so no generation config will be created. Consider " - "setting taskmodule_config to a valid taskmodule config to use specific setup for generation." - ) - generation_config = {} - generation_config.update(kwargs) - return generation_config - - def predict(self, inputs, **kwargs) -> TargetType: - is_training = self.training - self.eval() - - generation_kwargs = copy.deepcopy(self.generation_config) - generation_kwargs.update(kwargs) - outputs = self.model.generate(**inputs, **generation_kwargs) - - if is_training: - self.train() - - # TODO: move into base model? or does this work for "all" generative models? - # strip the bos_id - if isinstance(outputs, torch.Tensor): - return {"labels": outputs[:, 1:]} - else: - raise ValueError(f"Unsupported output type: {type(outputs)}") - - def forward(self, inputs: InputType, targets: Optional[TargetType] = None) -> OutputType: - kwargs = {**inputs, **(targets or {})} - return self.model(**kwargs) - - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - # construct prediction from the model output - logits = outputs.logits - # get the indices (these are without the initial bos_ids, see above) - prediction = torch.argmax(logits, dim=-1) - return {"labels": prediction.to(torch.long)} - - def configure_optimizers(self) -> OptimizerLRScheduler: - if hasattr(self.model, "configure_optimizer") and callable(self.model.configure_optimizer): - if self.learning_rate is not None: - raise ValueError( - f"learning_rate is set to {self.learning_rate}, but the *base model* ({type(self.model)}) has a " - f"configure_optimizer method. Please set learning_rate to None and configure the optimizer " - f"inside the *base model*." - ) - optimizer = self.model.configure_optimizer() - else: - logger.warning( - f"The model does not have a configure_optimizer method. Creating an optimizer of " - f"optimizer_type={self.optimizer_type} with the learning_rate={self.learning_rate} instead." - ) - if self.optimizer_type is None: - raise ValueError( - f"optimizer_type is None, but the *base model* ({type(self.model)}) does not have a " - f"configure_optimizer method. Please set the optimizer_type to a valid optimizer type, " - f"e.g. optimizer_type=torch.optim.Adam." - ) - resolved_optimizer_type = resolve_type( - self.optimizer_type, expected_super_type=Optimizer - ) - optimizer = resolved_optimizer_type(self.parameters(), lr=self.learning_rate) - - if self.scheduler_name is not None: - num_training_steps = self.trainer.estimated_stepping_batches - num_warmup_steps = int(num_training_steps * self.warmup_proportion) - scheduler = get_scheduler( - name=self.scheduler_name, - optimizer=optimizer, - num_warmup_steps=num_warmup_steps, - num_training_steps=num_training_steps, - scheduler_specific_kwargs=self.scheduler_kwargs, - ) - return [optimizer], [{"scheduler": scheduler, "interval": "step"}] - else: - return optimizer diff --git a/src/pie_modules/models/simple_sequence_classification.py b/src/pie_modules/models/simple_sequence_classification.py deleted file mode 100644 index 651a043a2..000000000 --- a/src/pie_modules/models/simple_sequence_classification.py +++ /dev/null @@ -1,140 +0,0 @@ -import logging -from typing import Iterator, MutableMapping, Optional, Tuple, Union - -import torch.nn -from pytorch_ie import PyTorchIEModel -from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses -from torch import FloatTensor, LongTensor -from torch.nn import Parameter -from torch.optim import AdamW -from transformers import ( - AutoConfig, - AutoModelForSequenceClassification, - get_linear_schedule_with_warmup, -) -from transformers.modeling_outputs import SequenceClassifierOutput -from typing_extensions import TypeAlias - -from pie_modules.models.common import ModelWithBoilerplate - -# model inputs / outputs / targets -InputType: TypeAlias = MutableMapping[str, LongTensor] -OutputType: TypeAlias = SequenceClassifierOutput -TargetType: TypeAlias = MutableMapping[str, Union[LongTensor, FloatTensor]] -# step inputs (batch) / outputs (loss) -StepInputType: TypeAlias = Tuple[InputType, TargetType] -StepOutputType: TypeAlias = FloatTensor - - -logger = logging.getLogger(__name__) - - -@PyTorchIEModel.register() -class SimpleSequenceClassificationModel( - ModelWithBoilerplate[InputType, OutputType, TargetType, StepOutputType], - RequiresModelNameOrPath, - RequiresNumClasses, -): - """A simple sequence classification model. It wraps a HuggingFace - AutoModelForSequenceClassification and adds boilerplate code for training and inference. - - Args: - model_name_or_path: The name or path of the HuggingFace model to use. - num_classes: The number of classes for the classification task. - tokenizer_vocab_size: The size of the tokenizer vocabulary. If provided, the model's - tokenizer embeddings are resized to this size. - learning_rate: The learning rate for the optimizer. - task_learning_rate: The learning rate for the task-specific parameters. If None, the - learning rate for all parameters is set to `learning_rate`. - warmup_proportion: The proportion of steps to warm up the learning rate. - freeze_base_model: If True, the base model parameters are frozen. - base_model_prefix: The prefix of the base model parameters when using a task_learning_rate - or freeze_base_model. If None, the base_model_prefix of the model is used. - **kwargs: Additional keyword arguments passed to the parent class, - see :class:`ModelWithBoilerplate`. - """ - - def __init__( - self, - model_name_or_path: str, - num_classes: int, - tokenizer_vocab_size: Optional[int] = None, - learning_rate: float = 1e-5, - task_learning_rate: Optional[float] = None, - warmup_proportion: float = 0.1, - freeze_base_model: bool = False, - base_model_prefix: Optional[str] = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.save_hyperparameters() - - self.learning_rate = learning_rate - self.task_learning_rate = task_learning_rate - self.warmup_proportion = warmup_proportion - self.freeze_base_model = freeze_base_model - - config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_classes) - if self.is_from_pretrained: - self.model = AutoModelForSequenceClassification.from_config(config=config) - else: - self.model = AutoModelForSequenceClassification.from_pretrained( - model_name_or_path, config=config - ) - - self.base_model_prefix = base_model_prefix or self.model.base_model_prefix - - if tokenizer_vocab_size is not None: - self.model.resize_token_embeddings(tokenizer_vocab_size) - - if self.freeze_base_model: - for name, param in self.base_model_named_parameters(): - param.requires_grad = False - - def base_model_named_parameters(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - base_model: torch.nn.Module = getattr(self.model, self.base_model_prefix, None) - if base_model is None: - raise ValueError( - f"Base model with prefix '{self.base_model_prefix}' not found in {type(self.model).__name__}" - ) - if prefix: - prefix = f"{prefix}." - return base_model.named_parameters(prefix=f"{prefix}model.{self.base_model_prefix}") - - def task_named_parameters(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - base_model_parameter_names = dict(self.base_model_named_parameters(prefix=prefix)).keys() - for name, param in self.named_parameters(prefix=prefix): - if name not in base_model_parameter_names: - yield name, param - - def forward(self, inputs: InputType, targets: Optional[TargetType] = None) -> OutputType: - kwargs = {**inputs, **(targets or {})} - return self.model(**kwargs) - - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - labels = torch.argmax(outputs.logits, dim=-1).to(torch.long) - probabilities = torch.softmax(outputs.logits, dim=-1) - return {"labels": labels, "probabilities": probabilities} - - def configure_optimizers(self): - if self.task_learning_rate is not None: - base_model_params = [param for name, param in self.base_model_named_parameters()] - task_params = [param for name, param in self.task_named_parameters()] - optimizer = AdamW( - [ - {"params": base_model_params, "lr": self.learning_rate}, - {"params": task_params, "lr": self.task_learning_rate}, - ] - ) - else: - optimizer = AdamW(self.parameters(), lr=self.learning_rate) - - if self.warmup_proportion > 0.0: - stepping_batches = self.trainer.estimated_stepping_batches - scheduler = get_linear_schedule_with_warmup( - optimizer, int(stepping_batches * self.warmup_proportion), stepping_batches - ) - return [optimizer], [{"scheduler": scheduler, "interval": "step"}] - else: - return optimizer diff --git a/src/pie_modules/models/simple_token_classification.py b/src/pie_modules/models/simple_token_classification.py deleted file mode 100644 index 7879aae04..000000000 --- a/src/pie_modules/models/simple_token_classification.py +++ /dev/null @@ -1,97 +0,0 @@ -import logging -from typing import MutableMapping, Optional, Tuple, Union - -import torch -from pytorch_ie import PyTorchIEModel -from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses -from pytorch_lightning.utilities.types import OptimizerLRScheduler -from torch import FloatTensor, LongTensor -from transformers import AutoConfig, AutoModelForTokenClassification, BatchEncoding -from transformers.modeling_outputs import TokenClassifierOutput -from typing_extensions import TypeAlias - -from pie_modules.models.common import ModelWithBoilerplate - -# model inputs / outputs / targets -InputType: TypeAlias = BatchEncoding -OutputType: TypeAlias = TokenClassifierOutput -TargetType: TypeAlias = MutableMapping[str, Union[LongTensor, FloatTensor]] -# step inputs (batch) / outputs (loss) -StepInputType: TypeAlias = Tuple[InputType, TargetType] -StepOutputType: TypeAlias = FloatTensor - - -logger = logging.getLogger(__name__) - - -@PyTorchIEModel.register() -class SimpleTokenClassificationModel( - ModelWithBoilerplate[InputType, OutputType, TargetType, StepOutputType], - RequiresModelNameOrPath, - RequiresNumClasses, -): - """A simple token classification model that wraps a (pretrained) model loaded with - AutoModelForTokenClassification from the transformers library. - - The model is trained with a cross-entropy loss function and uses the Adam optimizer. - - Note that for training, the labels for the special tokens (as well as for padding tokens) - are expected to have the value label_pad_id (-100 by default, which is the default ignore_index - value for the CrossEntropyLoss). The predictions for these tokens are also replaced with - label_pad_id to match the training labels for correct metric calculation. Therefore, the model - requires the special_tokens_mask and attention_mask (for padding) to be passed as inputs. - - Args: - model_name_or_path: The name or path of the pretrained transformer model to use. - num_classes: The number of classes to predict. - learning_rate: The learning rate to use for training. - label_pad_id: The label id to use for padding labels (at the padding token positions - as well as for the special tokens). - """ - - def __init__( - self, - model_name_or_path: str, - num_classes: int, - learning_rate: float = 1e-5, - label_pad_id: int = -100, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.save_hyperparameters() - - self.learning_rate = learning_rate - self.label_pad_id = label_pad_id - self.num_classes = num_classes - - config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_classes) - if self.is_from_pretrained: - self.model = AutoModelForTokenClassification.from_config(config=config) - else: - self.model = AutoModelForTokenClassification.from_pretrained( - model_name_or_path, config=config - ) - - def forward(self, inputs: InputType, targets: Optional[TargetType] = None) -> OutputType: - inputs_without_special_tokens_mask = { - k: v for k, v in inputs.items() if k != "special_tokens_mask" - } - return self.model(**inputs_without_special_tokens_mask, **(targets or {})) - - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - # get the max index for each token from the logits - tags_tensor = torch.argmax(outputs.logits, dim=-1).to(torch.long) - - # mask out the padding and special tokens - tags_tensor = tags_tensor.masked_fill(inputs["attention_mask"] == 0, self.label_pad_id) - - # mask out the special tokens - tags_tensor = tags_tensor.masked_fill( - inputs["special_tokens_mask"] == 1, self.label_pad_id - ) - probabilities = torch.softmax(outputs.logits, dim=-1) - - return {"labels": tags_tensor, "probabilities": probabilities} - - def configure_optimizers(self) -> OptimizerLRScheduler: - return torch.optim.Adam(self.parameters(), lr=self.learning_rate) diff --git a/src/pie_modules/models/span_tuple_classification.py b/src/pie_modules/models/span_tuple_classification.py deleted file mode 100644 index bbdbf4938..000000000 --- a/src/pie_modules/models/span_tuple_classification.py +++ /dev/null @@ -1,457 +0,0 @@ -import logging -from dataclasses import dataclass -from typing import Iterator, List, MutableMapping, Optional, Tuple, TypeVar, Union - -import torch -from pytorch_ie import PyTorchIEModel -from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses -from torch import BoolTensor, FloatTensor, LongTensor, Tensor, nn -from torch.nn import Dropout, Parameter -from torch.optim import AdamW -from transformers import AutoConfig, AutoModel, get_linear_schedule_with_warmup -from transformers.utils import ModelOutput -from typing_extensions import TypeAlias - -from .common import ModelWithBoilerplate - - -class MLP(nn.Module): - def __init__(self, n_in, n_out, dropout=0, activation=nn.GELU()): - super().__init__() - self.linear = nn.Linear(n_in, n_out) - self.f = activation - self.dropout = Dropout(p=dropout) - self.reset_parameters() - - def reset_parameters(self): - nn.init.xavier_normal_(self.linear.weight) - nn.init.zeros_(self.linear.bias) - - def forward(self, x): - x = self.f(self.linear(x)) - x = self.dropout(x) - return x - - -@dataclass -class SpanPairClassifierOutput(ModelOutput): - """Base class for outputs of span pair classification models. - - Args: - loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided) : - Classification loss. - logits (`torch.FloatTensor` of shape `(num_valid_input_pairs_in_batch, config.num_labels)`): - Classification scores (before SoftMax). - last_hidden_state (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`, *optional*): - The last hidden state of the transformer model. Returned if `return_embeddings=True`. - span_embeddings (`torch.FloatTensor` of shape `(batch_size, num_spans, span_embedding_dim)`, *optional*): - The embeddings of the spans. Returned if `return_embeddings=True`. - tuple_embeddings (`torch.FloatTensor` of shape `(num_valid_input_pairs_in_batch, tuple_embedding_dim)`, *optional*): - The embeddings of the tuples. Returned if `return_embeddings=True`. - """ - - loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - last_hidden_state: Optional[torch.FloatTensor] = None - span_embeddings: Optional[torch.FloatTensor] = None - tuple_embeddings: Optional[torch.FloatTensor] = None - - -# model inputs / outputs / targets -InputType: TypeAlias = MutableMapping[str, LongTensor] -OutputType: TypeAlias = SpanPairClassifierOutput -TargetType: TypeAlias = MutableMapping[str, Union[LongTensor, FloatTensor]] -# step inputs (batch) / outputs (loss) -StepInputType: TypeAlias = Tuple[InputType, TargetType] -StepOutputType: TypeAlias = FloatTensor - - -HF_MODEL_TYPE_TO_CLASSIFIER_DROPOUT_ATTRIBUTE = { - "albert": "classifier_dropout_prob", - "distilbert": "seq_classif_dropout", -} - -logger = logging.getLogger(__name__) - -T = TypeVar("T", bound=Tensor) - - -def get_embeddings_at_indices(embeddings: T, indices: LongTensor) -> T: - # embeddings: (bs, seq_len, hidden_size) - # indices: (bs, num_indices) - hidden_size = embeddings.size(-1) - # Expand dimensions of start_marker_positions to match hidden_states - indices_expanded = indices.unsqueeze(-1).expand(-1, -1, hidden_size) - # result: (bs, num_indices, hidden_size) - result = embeddings.gather(1, indices_expanded) - return result - - -@PyTorchIEModel.register() -class SpanTupleClassificationModel( - ModelWithBoilerplate[InputType, OutputType, TargetType, StepOutputType], - RequiresModelNameOrPath, - RequiresNumClasses, -): - """A span tuple classification model that uses a pooler to get a representation of the input - spans and then applies a linear classifier to that representation. The pooler can be configured - via the `span_embedding_mode` and `tuple_embedding_mode` arguments. It expects the input to - contain the indices of the start and end tokens of the spans (for the span pooler) and the - indices of the spans in the tuples to classify (for the tuple pooler). - - Args: - model_name_or_path: The name or path of the HuggingFace model to use. - num_classes: The number of classes for the classification task. - span_embedding_mode: The mode to pool the hidden states for the spans. One of "start_token", - "end_token", "start_and_end_token". - tuple_embedding_mode: The mode to pool the span embeddings for the tuples. Possible values are - "concat" (concatenate the embeddings of the tuple entries), "multiply2_and_concat" - (multiply the embeddings of the first two entries and concatenate them with the - embeddings of the first two entries) and "index_{idx}" (use the embedding of the entry - at index {idx} as the tuple embedding). Note that "multiply2_and_concat" requires - `num_tuple_entries=2`. Default: "multiply2_and_concat". - num_tuple_entries: The number of entries in the tuples. - tuple_entry_hidden_dim: If provided, the tuple entries (i.e. the span embeddings at the tuple indices) - are mapped to this dimensionality before combining them. Default: 768. - tokenizer_vocab_size: The size of the tokenizer vocabulary. If provided, the model's - tokenizer embeddings are resized to this size. - classifier_dropout: The dropout probability for the classifier. If not provided, the - dropout probability is taken from the Huggingface model config. - learning_rate: The learning rate for the optimizer. - task_learning_rate: The learning rate for the task-specific parameters. If None, the - learning rate for all parameters is set to `learning_rate`. - warmup_proportion: The proportion of steps to warm up the learning rate. - multi_label: If True, the model is trained as a multi-label classifier. - multi_label_threshold: The threshold for the multi-label classifier, i.e. the probability - above which a class is predicted. - freeze_base_model: If True, the base model parameters are frozen. - label_pad_value: The padding value for the labels. - probability_pad_value: The padding value for the probabilities. - **kwargs: Additional keyword arguments passed to the parent class, - see :class:`ModelWithBoilerplate`. - """ - - def __init__( - self, - model_name_or_path: str, - num_classes: int, - span_embedding_mode: str = "start_and_end_token", - tuple_embedding_mode: str = "multiply2_and_concat", - num_tuple_entries: int = 2, - tuple_entry_hidden_dim: Optional[int] = 768, - tokenizer_vocab_size: Optional[int] = None, - classifier_dropout: Optional[float] = None, - learning_rate: float = 1e-5, - task_learning_rate: Optional[float] = None, - warmup_proportion: float = 0.1, - multi_label: bool = False, - multi_label_threshold: float = 0.5, - freeze_base_model: bool = False, - label_pad_value: int = -100, - probability_pad_value: float = -1.0, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.save_hyperparameters() - - self.learning_rate = learning_rate - self.task_learning_rate = task_learning_rate - self.warmup_proportion = warmup_proportion - self.freeze_base_model = freeze_base_model - self.label_pad_value = label_pad_value - self.probability_pad_value = probability_pad_value - - config = AutoConfig.from_pretrained(model_name_or_path) - if self.is_from_pretrained: - self.model = AutoModel.from_config(config=config) - else: - self.model = AutoModel.from_pretrained(model_name_or_path, config=config) - - if tokenizer_vocab_size is not None: - self.model.resize_token_embeddings(tokenizer_vocab_size) - - if self.freeze_base_model: - for param in self.model.parameters(): - param.requires_grad = False - - if classifier_dropout is None: - # Get the classifier dropout value from the Huggingface model config. - # This is a bit of a mess since some Configs use different variable names or change the semantics - # of the dropout (e.g. DistilBert has one dropout prob for QA and one for Seq classification, and a - # general one for embeddings, encoder and pooler). - classifier_dropout_attr = HF_MODEL_TYPE_TO_CLASSIFIER_DROPOUT_ATTRIBUTE.get( - config.model_type, "classifier_dropout" - ) - classifier_dropout = getattr(config, classifier_dropout_attr) or 0.0 - self.dropout = nn.Dropout(classifier_dropout) - - # embedder for the spans - self.span_embedding_mode = span_embedding_mode - if self.span_embedding_mode in ["start_token", "end_token"]: - self.span_embedding_dim = self.model.config.hidden_size - elif self.span_embedding_mode in ["start_and_end_token"]: - self.span_embedding_dim = self.model.config.hidden_size * 2 - else: - raise ValueError(f"Invalid value for span_embedding_mode: {self.span_embedding_mode}") - - # embedder for the tuples - self.num_tuple_entries = num_tuple_entries - self.tuple_entry_hidden_dim = tuple_entry_hidden_dim - if self.tuple_entry_hidden_dim is not None: - self.tuple_entry_embedders = nn.ModuleList( - [ - MLP(self.span_embedding_dim, self.tuple_entry_hidden_dim) - for _ in range(num_tuple_entries) - ] - ) - tuple_entry_dim = self.tuple_entry_hidden_dim - else: - self.tuple_entry_embedders = None - tuple_entry_dim = self.span_embedding_dim - self.tuple_embedding_mode = tuple_embedding_mode - if self.tuple_embedding_mode == "concat": - tuple_embedding_dim = tuple_entry_dim * self.num_tuple_entries - elif self.tuple_embedding_mode == "multiply2_and_concat": - if self.num_tuple_entries != 2: - raise ValueError( - "tuple_embedding_mode='multiply2_and_concat' requires num_tuple_entries=2" - ) - tuple_embedding_dim = tuple_entry_dim * 3 - elif self.tuple_embedding_mode.startswith("index_"): - idx = int(self.tuple_embedding_mode.split("_")[1]) - if idx >= self.num_tuple_entries: - raise ValueError( - f"Invalid index IDX={idx} for tuple_embedding_mode='index_IDX'. " - f"Number of entries in tuple: {self.num_tuple_entries}" - ) - tuple_embedding_dim = tuple_entry_dim - else: - raise ValueError( - f"Invalid value for tuple_embedding_mode: {self.tuple_embedding_mode}" - ) - - # classifier - # TODO: do sth more sophisticated here - self.classifier = nn.Linear(tuple_embedding_dim, num_classes) - - self.multi_label = multi_label - self.multi_label_threshold = multi_label_threshold - self.loss_fct = nn.BCEWithLogitsLoss() if self.multi_label else nn.CrossEntropyLoss() - - def span_embedder( - self, - hidden_state: FloatTensor, - span_start_indices: LongTensor, - span_end_indices: LongTensor, - ) -> FloatTensor: - """Create the span embeddings from the hidden states and the span start and end indices. - - Args: - hidden_state: The last hidden state from the transformer model. shape: (batch_size, seq_len, hidden_size) - span_start_indices: The indices of the start tokens of the spans. shape: (batch_size, num_spans) - span_end_indices: The indices of the end tokens of the spans. shape: (batch_size, num_spans) - - Returns: - The pooled span embeddings. shape: (batch_size, num_spans, hidden_size) - """ - - if self.span_embedding_mode == "start_token": - span_embeddings = get_embeddings_at_indices(hidden_state, span_start_indices) - elif self.span_embedding_mode == "end_token": - span_embeddings = get_embeddings_at_indices(hidden_state, span_end_indices) - elif self.span_embedding_mode == "start_and_end_token": - span_embeddings = torch.cat( - [ - get_embeddings_at_indices(hidden_state, span_start_indices), - get_embeddings_at_indices(hidden_state, span_end_indices), - ], - dim=-1, - ) - else: - raise ValueError(f"Invalid value for span_embedding_mode: {self.span_embedding_mode}") - - return span_embeddings - - def tuple_embedder( - self, - span_embeddings: FloatTensor, - tuple_indices: LongTensor, - tuple_indices_mask: BoolTensor, - ) -> FloatTensor: - """Create the tuple embeddings from the span embeddings and the tuple indices. - - Args: - span_embeddings: The span embeddings. shape: (batch_size, num_spans, span_embedding_size) - tuple_indices: The indices of the spans in the tuples. shape: (batch_size, num_tuples, num_tuple_entries) - tuple_indices_mask: A mask indicating which tuples are valid. shape: (batch_size, num_tuples) - - Returns: - The pooled tuple embeddings. shape: (num_tuples_in_batch, num_tuple_entries * span_embedding_size) - """ - - if not tuple_indices.shape[-1] == self.num_tuple_entries: - raise ValueError( - f"Number of entries in tuple_indices should be equal to num_tuple_entries={self.num_tuple_entries}" - ) - batch_size, max_num_spans = span_embeddings.shape[:2] - # we need to add the batch offsets to the tuple indices to get the correct indices in the - # flattened span_embeddings - batch_offsets = ( - torch.arange(batch_size, device=tuple_indices.device).unsqueeze(-1).unsqueeze(-1) - * max_num_spans - ) - tuple_indices_with_offsets = tuple_indices + batch_offsets - # shape: (num_tuples_in_batch, num_entries) - valid_tuple_indices_flat = tuple_indices_with_offsets[tuple_indices_mask] - - # we need to flatten the span_embeddings to get the embeddings at the tuple indices - # shape: (batch_size * num_spans, span_embedding_size) - span_embeddings_flat = span_embeddings.view(-1, span_embeddings.size(-1)) - - # map the span embeddings individually for each tuple entry - # each entry has the shape: (batch_size * num_spans, tuple_entry_dim) - if self.tuple_entry_embedders is not None: - span_embeddings_mapped = [ - mlp(span_embeddings_flat) for mlp in self.tuple_entry_embedders - ] - else: - span_embeddings_mapped = [span_embeddings_flat] * self.num_tuple_entries - - tuple_embeddings_list: List[FloatTensor] = [] - for i in range(self.num_tuple_entries): - # shape: (num_tuples_in_batch) - current_tuple_indices = valid_tuple_indices_flat[:, i] - # get the embeddings that were mapped with the mlp for the current entry - # shape: (batch_size * num_spans, tuple_entry_dim) - span_embeddings_mapped_for_entry = span_embeddings_mapped[i] - # shape: (num_tuples_in_batch, tuple_entry_dim) - current_embeddings = span_embeddings_mapped_for_entry[current_tuple_indices] - tuple_embeddings_list.append(current_embeddings) - if self.tuple_embedding_mode == "concat": - tuple_embeddings = torch.cat(tuple_embeddings_list, dim=-1).to(span_embeddings.dtype) - elif self.tuple_embedding_mode == "multiply2_and_concat": - tuple_embeddings = torch.cat( - [ - tuple_embeddings_list[0] * tuple_embeddings_list[1], - tuple_embeddings_list[0], - tuple_embeddings_list[1], - ], - dim=-1, - ) - elif self.tuple_embedding_mode.startswith("index_"): - index = int(self.tuple_embedding_mode.split("_")[1]) - tuple_embeddings = tuple_embeddings_list[index] - else: - raise ValueError( - f"Invalid value for tuple_embedding_mode: {self.tuple_embedding_mode}" - ) - return tuple_embeddings - - def forward( - self, - inputs: InputType, - targets: Optional[TargetType] = None, - return_embeddings: bool = False, - ) -> OutputType: - span_embedder_inputs = {} - tuple_embedder_inputs = {} - base_model_inputs = {} - for k, v in inputs.items(): - if k.startswith("span_"): - span_embedder_inputs[k] = v - elif k.startswith("tuple_"): - tuple_embedder_inputs[k] = v - else: - base_model_inputs[k] = v - - output = self.model(**base_model_inputs) - last_hidden_state = self.dropout(output.last_hidden_state) - - # get the span embeddings from the hidden states and the start and end marker positions - span_embeddings = self.span_embedder( - hidden_state=last_hidden_state, **span_embedder_inputs - ) - # get the tuple embeddings from the span embeddings and the tuple indices - # Note that this flattens the batch dimension to not compute embeddings for padding tuples! - tuple_embeddings_flat = self.tuple_embedder( - span_embeddings=span_embeddings, **tuple_embedder_inputs - ) - - logits_valid = self.classifier(tuple_embeddings_flat) - - result = {"logits": logits_valid} - if targets is not None: - labels = targets["labels"] - mask = inputs["tuple_indices_mask"] - valid_labels = labels[mask] - loss = self.loss_fct(logits_valid, valid_labels) - result["loss"] = loss - - if return_embeddings: - result["last_hidden_state"] = last_hidden_state - result["tuple_embeddings"] = tuple_embeddings_flat - result["span_embeddings"] = span_embeddings - - return SpanPairClassifierOutput(**result) - - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - if not self.multi_label: - labels_flat = torch.argmax(outputs.logits, dim=-1).to(torch.long) - probabilities_flat = torch.softmax(outputs.logits, dim=-1) - else: - probabilities_flat = torch.sigmoid(outputs.logits) - labels_flat = (probabilities_flat > self.multi_label_threshold).to(torch.long) - - # re-construct the original shape - mask = inputs["tuple_indices_mask"] - # create "empty" labels and probabilities tensors - labels = ( - torch.ones(mask.shape, dtype=torch.long, device=labels_flat.device) - * self.label_pad_value - ) - prob_shape = list(mask.shape) + [probabilities_flat.shape[-1]] - probabilities = ( - torch.ones(prob_shape, dtype=torch.float, device=probabilities_flat.device) - * self.probability_pad_value - ) - # fill in the valid values - labels[mask] = labels_flat - probabilities[mask] = probabilities_flat - - return {"labels": labels, "probabilities": probabilities} - - def base_model_named_parameters(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - if prefix: - prefix = f"{prefix}." - return self.model.named_parameters(prefix=f"{prefix}model") - - def task_named_parameters(self, prefix: str = "") -> Iterator[Tuple[str, Parameter]]: - if prefix: - prefix = f"{prefix}." - base_model_parameter_names = dict(self.base_model_named_parameters(prefix=prefix)).keys() - for name, param in self.named_parameters(prefix=prefix): - if name not in base_model_parameter_names: - yield name, param - - def configure_optimizers(self): - if self.task_learning_rate is not None: - base_model_params = (param for name, param in self.base_model_named_parameters()) - task_params = (param for name, param in self.task_named_parameters()) - optimizer = AdamW( - [ - {"params": base_model_params, "lr": self.learning_rate}, - {"params": task_params, "lr": self.task_learning_rate}, - ] - ) - else: - optimizer = AdamW(self.parameters(), lr=self.learning_rate) - - if self.warmup_proportion > 0.0: - stepping_batches = self.trainer.estimated_stepping_batches - scheduler = get_linear_schedule_with_warmup( - optimizer, int(stepping_batches * self.warmup_proportion), stepping_batches - ) - return [optimizer], [{"scheduler": scheduler, "interval": "step"}] - else: - return optimizer diff --git a/src/pie_modules/models/token_classification_with_seq2seq_encoder_and_crf.py b/src/pie_modules/models/token_classification_with_seq2seq_encoder_and_crf.py deleted file mode 100644 index e5b0b6e16..000000000 --- a/src/pie_modules/models/token_classification_with_seq2seq_encoder_and_crf.py +++ /dev/null @@ -1,247 +0,0 @@ -import logging -from typing import Any, Dict, MutableMapping, Optional, Tuple, Union - -import torch -from pytorch_ie import PyTorchIEModel -from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses -from pytorch_lightning.utilities.types import OptimizerLRScheduler -from torch import FloatTensor, LongTensor, nn -from transformers import ( - AutoConfig, - AutoModel, - BatchEncoding, - get_linear_schedule_with_warmup, -) -from transformers.modeling_outputs import TokenClassifierOutput -from typing_extensions import TypeAlias - -from pie_modules.models.common import ModelWithBoilerplate -from pie_modules.models.components.seq2seq_encoder import build_seq2seq_encoder - -# model inputs / outputs / targets -InputType: TypeAlias = BatchEncoding -OutputType: TypeAlias = TokenClassifierOutput -TargetType: TypeAlias = MutableMapping[str, Union[LongTensor, FloatTensor]] -# step inputs (batch) / outputs (loss) -StepInputType: TypeAlias = Tuple[InputType, TargetType] -StepOutputType: TypeAlias = FloatTensor - -HF_MODEL_TYPE_TO_CLASSIFIER_DROPOUT_ATTRIBUTE = { - "bert": "hidden_dropout_prob", - "roberta": "hidden_dropout_prob", - "albert": "classifier_dropout_prob", - "distilbert": "seq_classif_dropout", - "deberta-v2": "hidden_dropout_prob", - "longformer": "hidden_dropout_prob", -} - -logger = logging.getLogger(__name__) - - -@PyTorchIEModel.register() -class TokenClassificationModelWithSeq2SeqEncoderAndCrf( - ModelWithBoilerplate[InputType, OutputType, TargetType, StepOutputType], - RequiresNumClasses, - RequiresModelNameOrPath, -): - """A token classification model that wraps a (pretrained) model loaded with AutoModel from the - transformers library. The model can optionally be followed by a seq2seq encoder (e.g. an LSTM). - Finally, Conditional Random Fields (CRFs) can be used to decode the predictions. - - The model is trained with a cross-entropy loss function and uses the Adam optimizer. - - Note that for training, the labels for the special tokens (as well as for padding tokens) - are expected to have the value label_pad_id (-100 by default, which is the default ignore_index - value for the CrossEntropyLoss). The predictions for these tokens are also replaced with - label_pad_id to match the training labels for correct metric calculation. Therefore, the model - requires the special_tokens_mask and attention_mask (for padding) to be passed as inputs. - - Args: - model_name_or_path: The name or path of the (pretrained) transformer model to use. - num_classes: The number of classes to predict. - learning_rate: The learning rate to use for training. - task_learning_rate: The learning rate to use for the task-specific parameters, i.e. - for the sequence-to-sequence encoder, classification head, and CRF. If None, the - learning_rate is used for all parameters. - use_crf: Whether to use a CRF to decode the predictions. - label_pad_id: The label id to use for padding labels (at the padding token positions - as well as for the special tokens). - special_token_label_id: The label id to use for special tokens (e.g. [CLS], [SEP]). This - is used to replace the targets for special tokens with the label_pad_id before passing - them to the CRF because the CRF does not allow the first token to be masked out. - classifier_dropout: The dropout probability to use for the classification head. - freeze_base_model: Whether to freeze the base model (i.e. the transformer) during training. - warmup_proportion: The proportion of training steps to use for the linear warmup. - seq2seq_encoder: A dictionary with the configuration for the seq2seq encoder. If None, no - seq2seq encoder is used. See ./components/seq2seq_encoder.py for further information. - """ - - def __init__( - self, - model_name_or_path: str, - num_classes: int, - learning_rate: float = 1e-5, - task_learning_rate: Optional[float] = None, - use_crf: bool = True, - label_pad_id: int = -100, - special_token_label_id: int = 0, - classifier_dropout: Optional[float] = None, - freeze_base_model: bool = False, - warmup_proportion: float = 0.1, - seq2seq_encoder: Optional[Dict[str, Any]] = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.save_hyperparameters() - - self.special_token_label_id = special_token_label_id - - self.learning_rate = learning_rate - self.warmup_proportion = warmup_proportion - self.task_learning_rate = task_learning_rate - self.label_pad_id = label_pad_id - self.num_classes = num_classes - - config = AutoConfig.from_pretrained(model_name_or_path) - if self.is_from_pretrained: - self.model = AutoModel.from_config(config=config) - else: - self.model = AutoModel.from_pretrained(model_name_or_path, config=config) - - if freeze_base_model: - self.model.requires_grad_(False) - - hidden_size = config.hidden_size - self.seq2seq_encoder = None - if seq2seq_encoder is not None: - self.seq2seq_encoder, hidden_size = build_seq2seq_encoder( - config=seq2seq_encoder, input_size=hidden_size - ) - - if classifier_dropout is None: - # Get the classifier dropout value from the Huggingface model config. - # This is a bit of a mess since some Configs use different variable names or change the semantics - # of the dropout (e.g. DistilBert has one dropout prob for QA and one for Seq classification, and a - # general one for embeddings, encoder and pooler). - classifier_dropout_attr = HF_MODEL_TYPE_TO_CLASSIFIER_DROPOUT_ATTRIBUTE.get( - config.model_type, "classifier_dropout" - ) - if hasattr(config, classifier_dropout_attr): - classifier_dropout = getattr(config, classifier_dropout_attr) - else: - raise ValueError( - f"The config {type(config),__name__} loaded from {model_name_or_path} has no attribute " - f"{classifier_dropout_attr}" - ) - self.dropout = nn.Dropout(classifier_dropout) - - self.classifier = nn.Linear(hidden_size, num_classes) - - if use_crf: - try: - from torchcrf import CRF - except ImportError: - raise ImportError( - "To use CRFs, the torchcrf package must be installed. " - "You can install it with `pip install pytorch-crf`." - ) - - self.crf = CRF(num_tags=num_classes, batch_first=True) - else: - self.crf = None - - def decode(self, inputs: InputType, outputs: OutputType) -> TargetType: - result = {} - logits = outputs.logits - attention_mask = inputs["attention_mask"] - special_tokens_mask = inputs["special_tokens_mask"] - attention_mask_bool = attention_mask.to(torch.bool) - if self.crf is not None: - decoded_tags = self.crf.decode(emissions=logits, mask=attention_mask_bool) - # pad the decoded tags to the length of the logits to have the same shape as when not using the crf - seq_len = logits.shape[1] - padded_tags = [ - tags + [self.label_pad_id] * (seq_len - len(tags)) for tags in decoded_tags - ] - tags_tensor = torch.tensor(padded_tags, device=logits.device).to(torch.long) - else: - # get the max index for each token from the logits - tags_tensor = torch.argmax(logits, dim=-1).to(torch.long) - # set the padding and special tokens to the label_pad_id - mask = attention_mask_bool & ~special_tokens_mask.to(torch.bool) - tags_tensor = tags_tensor.masked_fill(~mask, self.label_pad_id) - - result["labels"] = tags_tensor - # TODO: is it correct to use this also in the case of the crf? - result["probabilities"] = torch.softmax(logits, dim=-1) - - return result - - def forward( - self, inputs: InputType, targets: Optional[TargetType] = None - ) -> TokenClassifierOutput: - inputs_without_special_tokens_mask = { - k: v for k, v in inputs.items() if k != "special_tokens_mask" - } - outputs = self.model(**inputs_without_special_tokens_mask) - sequence_output = outputs[0] - - if self.seq2seq_encoder is not None: - sequence_output = self.seq2seq_encoder(sequence_output) - - sequence_output = self.dropout(sequence_output) - logits = self.classifier(sequence_output) - - loss = None - if targets is not None: - labels = targets["labels"] - if self.crf is not None: - # Overwrite the padding labels with ignore_index. Note that this is different from the - # attention_mask, because the attention_mask includes special tokens, whereas the labels - # are set to label_pad_id also for special tokens (e.g. [CLS]). We need handle all - # occurrences of label_pad_id because usually that index is out of range with respect to - # the number of logits in which case the crf would complain. However, we can not simply - # pass a mask to the crf that also masks out the special tokens, because the crf does not - # allow the first token to be masked out. - mask_pad_or_special = labels == self.label_pad_id - labels_valid = labels.masked_fill(mask_pad_or_special, self.special_token_label_id) - # the crf expects a bool mask - if "attention_mask" in inputs: - mask_bool = inputs["attention_mask"].to(torch.bool) - else: - mask_bool = None - log_likelihood = self.crf(emissions=logits, tags=labels_valid, mask=mask_bool) - loss = -log_likelihood - else: - loss_fct = nn.CrossEntropyLoss(ignore_index=self.label_pad_id) - loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1)) - - return TokenClassifierOutput( - loss=loss, - logits=logits, - hidden_states=outputs.hidden_states, - attentions=outputs.attentions, - ) - - def configure_optimizers(self) -> OptimizerLRScheduler: - if self.task_learning_rate is not None: - all_params = dict(self.named_parameters()) - base_model_params = dict(self.model.named_parameters(prefix="model")) - task_params = {k: v for k, v in all_params.items() if k not in base_model_params} - optimizer = torch.optim.AdamW( - [ - {"params": base_model_params.values(), "lr": self.learning_rate}, - {"params": task_params.values(), "lr": self.task_learning_rate}, - ] - ) - else: - optimizer = torch.optim.AdamW(self.parameters(), lr=self.learning_rate) - - if self.warmup_proportion > 0.0: - stepping_batches = self.trainer.estimated_stepping_batches - scheduler = get_linear_schedule_with_warmup( - optimizer, int(stepping_batches * self.warmup_proportion), stepping_batches - ) - return [optimizer], [{"scheduler": scheduler, "interval": "step"}] - else: - return optimizer diff --git a/src/pie_modules/taskmodules/__init__.py b/src/pie_modules/taskmodules/__init__.py deleted file mode 100644 index 46d3766ae..000000000 --- a/src/pie_modules/taskmodules/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .cross_text_binary_coref import CrossTextBinaryCorefTaskModule -from .extractive_question_answering import ExtractiveQuestionAnsweringTaskModule -from .labeled_span_extraction_by_token_classification import ( - LabeledSpanExtractionByTokenClassificationTaskModule, -) -from .pointer_network_for_end2end_re import PointerNetworkTaskModuleForEnd2EndRE -from .re_span_pair_classification import RESpanPairClassificationTaskModule -from .re_text_classification_with_indices import ( - RETextClassificationWithIndicesTaskModule, -) -from .text_to_text import TextToTextTaskModule diff --git a/src/pie_modules/taskmodules/common/__init__.py b/src/pie_modules/taskmodules/common/__init__.py index e95a3f006..e69de29bb 100644 --- a/src/pie_modules/taskmodules/common/__init__.py +++ b/src/pie_modules/taskmodules/common/__init__.py @@ -1,4 +0,0 @@ -from .interfaces import AnnotationEncoderDecoder, DecodingException -from .mixins import BatchableMixin, RelationStatisticsMixin, StatisticsMixin -from .taskmodule_with_document_converter import TaskModuleWithDocumentConverter -from .utils import get_first_occurrence_index diff --git a/src/pie_modules/taskmodules/common/interfaces.py b/src/pie_modules/taskmodules/common/interfaces.py deleted file mode 100644 index 388b60ae9..000000000 --- a/src/pie_modules/taskmodules/common/interfaces.py +++ /dev/null @@ -1,63 +0,0 @@ -import abc -from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar - -from pie_core import Annotation - -# Annotation Encoding type: encoding for a single annotation -AE = TypeVar("AE") -# Annotation type -A = TypeVar("A", bound=Annotation) -# Annotation Collection Encoding type: encoding for a collection of annotations, -# e.g. all relevant annotations for a document -ACE = TypeVar("ACE") - - -class DecodingException(Exception, Generic[AE], abc.ABC): - """Exception raised when decoding fails.""" - - identifier: str - - def __init__(self, message: str, encoding: AE): - self.message = message - self.encoding = encoding - - -class AnnotationEncoderDecoder(abc.ABC, Generic[A, AE]): - """Base class for annotation encoders and decoders.""" - - @abc.abstractmethod - def encode(self, annotation: A, metadata: Optional[Dict[str, Any]] = None) -> AE: - pass - - @abc.abstractmethod - def decode(self, encoding: AE, metadata: Optional[Dict[str, Any]] = None) -> A: - pass - - def build_decoding_constraints( - self, partial_encoding: AE - ) -> Tuple[Optional[Any], Optional[Any]]: - """Given a partial encoding, build the constraints for the next encoding step. - - Returns: - - A tuple of two elements: - - The first element is a set of positive constraints for the decoder. - - The second element is a set of negative constraints for the decoder. - """ - raise NotImplementedError( - "build_decoder_constraints is not implemented for this encoder/decoder." - ) - - def parse(self, encoding: AE) -> Tuple[List[A], Dict[str, int], AE]: - """Parse the encoding and return a list of annotations. This should be error tolerant and - return all annotations that can be parsed and the remaining encoding. - - Args: - encoding: The encoding to parse. Can be incomplete. - - Returns: - - A tuple of three elements: - - A list of encoded annotations. - - A dictionary mapping error types to their counts. - - The remaining encoding after parsing. - """ - raise NotImplementedError("parse is not implemented for this encoder/decoder.") diff --git a/src/pie_modules/taskmodules/common/mixins.py b/src/pie_modules/taskmodules/common/mixins.py deleted file mode 100644 index 0e2909893..000000000 --- a/src/pie_modules/taskmodules/common/mixins.py +++ /dev/null @@ -1,297 +0,0 @@ -import dataclasses -import logging -from abc import ABC, abstractmethod -from collections import defaultdict -from typing import Any, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar - -import pandas as pd -import torch -import torch.nn.functional as F -from pie_core import Annotation -from torch import Tensor - -logger = logging.getLogger(__name__) - - -def _pad_tensor(tensor: Tensor, target_shape: List[int], pad_value: float) -> Tensor: - """Pad a tensor to a target shape. - - Args: - tensor: The tensor to pad. - target_shape: The target shape. - pad_value: The value to use for padding. - - Returns: The padded tensor. - """ - - shape = tensor.shape - pad: List[int] = [] - for i, s in enumerate(shape): - pad = [0, target_shape[i] - s] + pad - result = F.pad(tensor, pad=pad, value=pad_value) - - return result - - -def maybe_pad_values( - values: Any, pad_value: Optional[Any] = None, strategy: str = "longest" -) -> Any: - """If an iterable of values is passed and a pad value is given, pad the values to the same - length and create a tensor from them. Otherwise, return the values unchanged. - - Note that the padding is done on all dimensions. - - Args: - values: The values to pad. - pad_value: The value to use for padding. - strategy: The padding strategy. Currently only "longest" is supported. - - Returns: The padded values. - """ - - if pad_value is None: - return values - if not isinstance(values, Iterable): - raise TypeError(f"values must be iterable to pad them, but got {type(values)}") - if strategy != "longest": - raise ValueError(f"unknown padding strategy: {strategy}") - tensor_list = [torch.tensor(value_list) for value_list in values] - shape_lists = list(zip(*[t.shape for t in tensor_list])) - max_shape = [max(dims) for dims in shape_lists] - padded = [ - _pad_tensor(tensor=t, target_shape=max_shape, pad_value=pad_value) - for i, t in enumerate(tensor_list) - ] - return torch.stack(padded) - - -def maybe_to_tensor( - values: Iterable[Any], dtype: Optional[torch.dtype] = None, pad_value: Optional[Any] = None -) -> Any: - """If an iterable of values is passed and a dtype is given, convert the values to a tensor of - the given type. - - Args: - values: The values to convert. - dtype: A dtype to convert the values to. - pad_value: A pad value to use if the values are padded. - - Returns: A tensor or the values unchanged. - """ - - if all(v is None for v in values): - return None - if dtype is None: - return values - maybe_padded = maybe_pad_values(values=values, pad_value=pad_value) - if not isinstance(maybe_padded, torch.Tensor): - maybe_padded = torch.Tensor(maybe_padded) - tensor = maybe_padded.to(dtype=dtype) - return tensor - - -class BatchableMixin: - """A mixin class that provides a batch method to batch a list of instances of the class. All - attributes, but also property methods, are batched. The batch method returns a dictionary with - all attribute / property names as keys. The values are tensors created from the stacked values - of the attributes / properties. The tensors are padded to the length of the longest instance in - the batch and converted to the given dtype. - - Example: - >>> import dataclasses - >>> from typing import List, Dict - >>> import torch - >>> - >>> @dataclasses.dataclass - >>> class Foo(BatchableMixin): - >>> a: List[int] - >>> - >>> @property - >>> def len_a(self): - >>> return len(self.a) - >>> - >>> x = Foo(a=[1, 2, 3]) - >>> y = Foo(a=[4, 5]) - >>> - >>> Foo.batch(values=[x, y], dtypes={"a": torch.int64, "len_a": torch.int64}, pad_values={"a": 0}) - {'a': tensor([[1, 2, 3],[4, 5, 0]]), 'len_a': tensor([3, 2])} - """ - - @classmethod - def get_property_names(cls) -> List[str]: - return [name for name in cls.__dict__ if isinstance(getattr(cls, name), property)] - - @classmethod - def get_dataclass_field_names(cls) -> List[str]: - if dataclasses.is_dataclass(cls): - return [f.name for f in dataclasses.fields(cls)] - else: - return [] - - @classmethod - def get_attribute_names(cls) -> List[str]: - return cls.get_property_names() + cls.get_dataclass_field_names() - - @classmethod - def batch( - cls, - values: List[Any], - dtypes: Dict[str, torch.dtype], - pad_values: Dict[str, Any], - ) -> Dict[str, Any]: - attribute_names = cls.get_attribute_names() - return { - k: maybe_to_tensor( - values=[getattr(x, k) for x in values], - dtype=dtypes.get(k, None), - pad_value=pad_values.get(k, None), - ) - for k in attribute_names - # Only batch attributes that are not None for any of the values. - if not all(getattr(x, k) is None for x in values) - } - - -T = TypeVar("T") - - -def increase_counter( - key: Tuple[Any, ...], - statistics: Dict[Tuple[Any, ...], int], - value: int = 1, -): - key_s = tuple(str(k) for k in key) - statistics[key_s] += value - - -class StatisticsMixin(ABC, Generic[T]): - """A mixin class that provides methods to collect and format statistics. - - Args: - collect_statistics: Control whether statistics should be collected. - If `False`, the mixin will not show any statistics when calling - `show_statistics`. Further effects depend on the implementation - of the mixin. - **kwargs: Additional keyword arguments to pass to the parent class. - """ - - def __init__(self, collect_statistics: bool = False, **kwargs): - super().__init__(**kwargs) - self.collect_statistics = collect_statistics - self.reset_statistics() - - @abstractmethod - def reset_statistics(self): - """Reset the statistics collected by this mixin (state).""" - pass - - @abstractmethod - def get_statistics(self) -> T: - """Get the statistics collected by this mixin. - - This should *not* modify the state of the mixin, repeated calls should return the same - result! - """ - pass - - def format_statistics(self, statistics: T) -> str: - """Format the statistics collected by this mixin as string for display (usually on - console).""" - raise NotImplementedError( - f"format_statistics is not implemented for {self.__class__.__name__}. " - "Please implement this method to show formatted statistics." - ) - - def show_statistics(self): - if self.collect_statistics: - logger.info(f"statistics:\n{self.format_statistics(self.get_statistics())}") - - -class RelationStatisticsMixin(StatisticsMixin[Dict[Tuple[str, str], int]]): - """A mixin class that provides methods to collect and format statistics about relations. - - This mixin collects statistics about relations, such as the number of available, used, and - skipped relations. - """ - - def get_none_label_for_statistics(self) -> str: - if not hasattr(self, "_statistics_none_label"): - if hasattr(self, "none_label"): - # If the mixin has a `none_label` attribute, use it as the label for "no relation". - self._statistics_none_label = self.none_label - else: - self._statistics_none_label = "no_relation" - logger.warning( - f"{type(self).__name__} does not have a `none_label` attribute. " - "Using default value 'no_relation'. " - "`none_label` is used as the label for relations with score 0 in statistics and " - "all relations with label different from `none_label` will be summarized to 'all_relations'. " - "Set the `none_label` attribute before using statistics or " - "overwrite `get_none_label_for_statistics()` function to get rid of this message." - ) - - return self._statistics_none_label - - def reset_statistics(self): - self._collected_relations: Dict[str, List[Annotation]] = defaultdict(list) - - def collect_relation(self, kind: str, relation: Annotation): - if self.collect_statistics: - self._collected_relations[kind].append(relation) - - def collect_all_relations(self, kind: str, relations: Iterable[Annotation]): - if self.collect_statistics: - self._collected_relations[kind].extend(relations) - - def get_statistics(self) -> Dict[Tuple[str, str], int]: - if self.collect_statistics: - # create statistics from the collected relations - statistics: Dict[Tuple[str, str], int] = defaultdict(int) - all_relations = set(self._collected_relations["available"]) - used_relations = set(self._collected_relations["used"]) - skipped_other = all_relations - used_relations - for key, rels in self._collected_relations.items(): - rels_set = set(rels) - if key.startswith("skipped_"): - skipped_other -= rels_set - elif key.startswith("used_"): - pass - elif key in ["available", "used"]: - pass - else: - raise ValueError(f"unknown key: {key}") - for rel in rels_set: - # Set `none_label` as label when the score is zero. We encode negative relations - # in such a way in the case of multi-label or binary (similarity for coref). - label = rel.label if rel.score > 0 else self.get_none_label_for_statistics() - increase_counter(key=(key, label), statistics=statistics) - for rel in skipped_other: - increase_counter(key=("skipped_other", rel.label), statistics=statistics) - - return dict(statistics) - else: - return {} - - def format_statistics(self, statistics: Dict[Tuple[str, str], int]) -> str: - if len(statistics) > 0: - to_show_series = pd.Series(statistics) - # unstack index to have relation labels as column names - to_show = to_show_series.unstack() - else: - # If there were no statistics, create an empty dummy dataframe. - to_show = pd.DataFrame(pd.Series(dict())) - # fill missing values with 0 and convert back to int (unstacking may introduce NaNs which are float type) - to_show = to_show.fillna(0).astype(int) - if to_show.columns.size > 1: - to_show["all_relations"] = to_show.loc[ - :, to_show.columns != self.get_none_label_for_statistics() - ].sum(axis=1) - - # transpose - # to have the labels (which may be a lot) as index for improved readability and - # to allow to keep counts as int columns (dtypes are per-column, not per-row) - to_show = to_show.T - if "used" in to_show.columns and "available" in to_show.columns: - to_show["used %"] = (100 * to_show["used"] / to_show["available"]).round() - - return to_show.to_markdown() diff --git a/src/pie_modules/taskmodules/common/taskmodule_with_document_converter.py b/src/pie_modules/taskmodules/common/taskmodule_with_document_converter.py deleted file mode 100644 index 69f8e7668..000000000 --- a/src/pie_modules/taskmodules/common/taskmodule_with_document_converter.py +++ /dev/null @@ -1,117 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Generic, Iterable, Iterator, Optional, Sequence, Type, TypeVar, Union - -from pie_core import ( - Document, - IterableTaskEncodingDataset, - TaskEncoding, - TaskEncodingDataset, - TaskEncodingSequence, - TaskModule, -) -from typing_extensions import TypeAlias - -DocumentType = TypeVar("DocumentType", bound=Document) -ConvertedDocumentType = TypeVar("ConvertedDocumentType", bound=Document) -InputEncodingType = TypeVar("InputEncodingType") -TargetEncodingType = TypeVar("TargetEncodingType") -# TaskEncoding: defined below -TaskBatchEncodingType = TypeVar("TaskBatchEncodingType") -# ModelBatchEncoding: defined in models -ModelBatchOutputType = TypeVar("ModelBatchOutputType") -TaskOutputType = TypeVar("TaskOutputType") - -TaskEncodingType: TypeAlias = TaskEncoding[ - DocumentType, - InputEncodingType, - TargetEncodingType, -] - - -class TaskModuleWithDocumentConverter( - TaskModule, - ABC, - Generic[ - ConvertedDocumentType, - DocumentType, - InputEncodingType, - TargetEncodingType, - TaskBatchEncodingType, - ModelBatchOutputType, - TaskOutputType, - ], -): - @property - def document_type(self) -> Optional[Type[Document]]: - if super().document_type is not None: - raise NotImplementedError(f"please overwrite document_type for {type(self).__name__}") - else: - return None - - @abstractmethod - def _convert_document(self, document: DocumentType) -> ConvertedDocumentType: - """Convert a document of the taskmodule document type to the expected document type of the - wrapped taskmodule. - - Args: - document: the input document - - Returns: the converted document - """ - pass - - def _prepare(self, documents: Sequence[DocumentType]) -> None: - # use an iterator for lazy processing - documents_converted = (self._convert_document(doc) for doc in documents) - super()._prepare(documents=documents_converted) - - def convert_document(self, document: DocumentType) -> ConvertedDocumentType: - converted_document = self._convert_document(document) - if "original_document" in converted_document.metadata: - raise ValueError( - f"metadata of converted_document has already and entry 'original_document', " - f"this is not allowed. Please adjust '{type(self).__name__}._convert_document()' " - f"to produce documents without that key in metadata." - ) - converted_document.metadata["original_document"] = document - return converted_document - - def encode(self, documents: Union[DocumentType, Iterable[DocumentType]], **kwargs) -> Union[ - Sequence[TaskEncodingType], - TaskEncodingSequence[TaskEncodingType, DocumentType], - Iterator[TaskEncodingType], - TaskEncodingDataset[TaskEncodingType], - IterableTaskEncodingDataset[TaskEncodingType], - ]: - converted_documents: Union[DocumentType, Iterable[DocumentType]] - if isinstance(documents, Document): - converted_documents = self.convert_document(documents) - else: - converted_documents = [self.convert_document(doc) for doc in documents] - return super().encode(documents=converted_documents, **kwargs) - - def decode(self, **kwargs) -> Sequence[DocumentType]: - decoded_documents = super().decode(**kwargs) - result = [] - for doc in decoded_documents: - original_document = doc.metadata["original_document"] - self._integrate_predictions_from_converted_document( - converted_document=doc, document=original_document - ) - result.append(original_document) - return result - - @abstractmethod - def _integrate_predictions_from_converted_document( - self, - document: DocumentType, - converted_document: ConvertedDocumentType, - ) -> None: - """Convert the predictions at the respective layers of the converted_document and add them - to the original document predictions. - - Args: - document: document to attach the converted predictions to - converted_document: the document returned by the wrapped taskmodule, including predictions - """ - pass diff --git a/src/pie_modules/taskmodules/common/utils.py b/src/pie_modules/taskmodules/common/utils.py deleted file mode 100644 index cc4daf626..000000000 --- a/src/pie_modules/taskmodules/common/utils.py +++ /dev/null @@ -1,32 +0,0 @@ -import logging -from typing import Union - -import torch - -logger = logging.getLogger(__name__) - - -def get_first_occurrence_index( - tensor: Union[torch.FloatTensor, torch.LongTensor], value: Union[float, int] -) -> torch.LongTensor: - """Returns the index of the first occurrence of `value` in each row of `tensor`. If `value` is - not found, seq_len is returned. - - Args: - tensor: the tensor of shape (bsz, seq_len) to search in - value: the value to search for - - Returns: a tensor of shape (bsz,) containing the index of the first occurrence of `value` in each row of `tensor`. - """ - - mask_value = tensor.eq(value) - # count matching positions from the end - value_counts_to_end = mask_value.flip(dims=[1]).cumsum(dim=1).flip(dims=[1]) - # at the first position stands the number of total matches - total_matches = value_counts_to_end[:, 0] - # the sum of all positions where the number of matches is equal to the total number of matches - # is the index *after* the first occurrence - result = value_counts_to_end.eq(total_matches.unsqueeze(-1)).sum(dim=1) - 1 - # set result to seq_len if no match was found - result[total_matches == 0] = tensor.size(1) - return result diff --git a/src/pie_modules/taskmodules/cross_text_binary_coref.py b/src/pie_modules/taskmodules/cross_text_binary_coref.py index cff2fa139..e69de29bb 100644 --- a/src/pie_modules/taskmodules/cross_text_binary_coref.py +++ b/src/pie_modules/taskmodules/cross_text_binary_coref.py @@ -1,292 +0,0 @@ -import copy -import logging -from typing import ( - Any, - Dict, - Iterable, - Iterator, - List, - Optional, - Sequence, - Tuple, - TypedDict, - TypeVar, - Union, -) - -import torch -from pie_core import Annotation, TaskEncoding, TaskModule -from pie_core.utils.dictionary import list_of_dicts2dict_of_lists -from pytorch_ie.utils.window import get_window_around_slice -from torchmetrics import MetricCollection -from torchmetrics.classification import ( - BinaryAUROC, - BinaryAveragePrecision, - BinaryF1Score, -) -from transformers import AutoTokenizer, BatchEncoding -from typing_extensions import TypeAlias - -from pie_modules.annotations import Span -from pie_modules.documents import ( - TextPairDocumentWithLabeledSpansAndBinaryCorefRelations, -) -from pie_modules.taskmodules.common.mixins import RelationStatisticsMixin -from pie_modules.taskmodules.metrics import WrappedMetricWithPrepareFunction -from pie_modules.utils.tokenization import ( - SpanNotAlignedWithTokenException, - get_aligned_token_span, -) - -logger = logging.getLogger(__name__) - -InputEncodingType: TypeAlias = Dict[str, Any] -TargetEncodingType: TypeAlias = Sequence[float] -DocumentType: TypeAlias = TextPairDocumentWithLabeledSpansAndBinaryCorefRelations - -TaskEncodingType: TypeAlias = TaskEncoding[ - DocumentType, - InputEncodingType, - TargetEncodingType, -] - - -class TaskOutputType(TypedDict, total=False): - score: float - is_similar: bool - - -ModelInputType: TypeAlias = Dict[str, torch.Tensor] -ModelTargetType: TypeAlias = Dict[str, torch.Tensor] -ModelOutputType: TypeAlias = Dict[str, torch.Tensor] - -TaskModuleType: TypeAlias = TaskModule[ - # _InputEncoding, _TargetEncoding, _TaskBatchEncoding, _ModelBatchOutput, _TaskOutput - DocumentType, - InputEncodingType, - TargetEncodingType, - Tuple[ModelInputType, Optional[ModelTargetType]], - ModelTargetType, - TaskOutputType, -] - - -class SpanDoesNotFitIntoAvailableWindow(Exception): - def __init__(self, span): - self.span = span - - -def _get_labels(model_output: ModelTargetType, label_threshold: float) -> torch.Tensor: - return (model_output["scores"] > label_threshold).to(torch.int) - - -def _get_scores(model_output: ModelTargetType) -> torch.Tensor: - return model_output["scores"] - - -S = TypeVar("S", bound=Span) - - -def shift_span(span: S, offset: int) -> S: - return span.copy(start=span.start + offset, end=span.end + offset) - - -@TaskModule.register() -class CrossTextBinaryCorefTaskModule(RelationStatisticsMixin, TaskModuleType): - """This taskmodule processes documents of type - TextPairDocumentWithLabeledSpansAndBinaryCorefRelations in preparation for a - SequencePairSimilarityModelWithPooler.""" - - DOCUMENT_TYPE = DocumentType - - def __init__( - self, - tokenizer_name_or_path: str, - similarity_threshold: float = 0.9, - max_window: Optional[int] = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.save_hyperparameters() - - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) - self.similarity_threshold = similarity_threshold - self.max_window = max_window if max_window is not None else self.tokenizer.model_max_length - self.available_window = self.max_window - self.tokenizer.num_special_tokens_to_add() - self.num_special_tokens_before = len(self._get_special_tokens_before_input()) - - def _get_special_tokens_before_input(self) -> List[int]: - dummy_ids = self.tokenizer.build_inputs_with_special_tokens(token_ids_0=[-1]) - return dummy_ids[: dummy_ids.index(-1)] - - def encode(self, documents: Union[DocumentType, Iterable[DocumentType]], **kwargs): - self.reset_statistics() - result = super().encode(documents=documents, **kwargs) - self.show_statistics() - return result - - def truncate_encoding_around_span( - self, encoding: BatchEncoding, char_span: Span - ) -> Tuple[Dict[str, List[int]], Span]: - input_ids = copy.deepcopy(encoding["input_ids"]) - - token_span = get_aligned_token_span(encoding=encoding, char_span=char_span) - - # truncate input_ids and shift token_start and token_end - if len(input_ids) > self.available_window: - window_slice = get_window_around_slice( - slice=(token_span.start, token_span.end), - max_window_size=self.available_window, - available_input_length=len(input_ids), - ) - if window_slice is None: - raise SpanDoesNotFitIntoAvailableWindow(span=token_span) - window_start, window_end = window_slice - input_ids = input_ids[window_start:window_end] - token_span = shift_span(token_span, offset=-window_start) - - truncated_encoding = self.tokenizer.prepare_for_model(ids=input_ids) - # shift indices because we added special tokens to the input_ids - token_span = shift_span(token_span, offset=self.num_special_tokens_before) - - return truncated_encoding, token_span - - def encode_input( - self, - document: DocumentType, - is_training: bool = False, - ) -> Optional[Union[TaskEncodingType, Sequence[TaskEncodingType]]]: - self.collect_all_relations(kind="available", relations=document.binary_coref_relations) - tokenizer_kwargs = dict( - padding=False, - truncation=False, - add_special_tokens=False, - ) - encoding = self.tokenizer(text=document.text, **tokenizer_kwargs) - encoding_pair = self.tokenizer(text=document.text_pair, **tokenizer_kwargs) - - task_encodings = [] - for coref_rel in document.binary_coref_relations: - # TODO: This can miss instances if both texts are the same. We could check that - # coref_rel.head is in document.labeled_spans (same for the tail), but would this - # slow down the encoding? - if not ( - coref_rel.head.target == document.text - or coref_rel.tail.target == document.text_pair - ): - raise ValueError( - f"It is expected that coref relations go from (head) spans over 'text' " - f"to (tail) spans over 'text_pair', but this is not the case for this " - f"relation (i.e. it points into the other direction): {coref_rel.resolve()}" - ) - try: - current_encoding, token_span = self.truncate_encoding_around_span( - encoding=encoding, char_span=coref_rel.head - ) - current_encoding_pair, token_span_pair = self.truncate_encoding_around_span( - encoding=encoding_pair, char_span=coref_rel.tail - ) - except SpanNotAlignedWithTokenException as e: - logger.warning( - f"Could not get token offsets for argument ({e.span}) of coref relation: " - f"{coref_rel.resolve()}. Skip it." - ) - self.collect_relation(kind="skipped_args_not_aligned", relation=coref_rel) - continue - except SpanDoesNotFitIntoAvailableWindow as e: - logger.warning( - f"Argument span [{e.span}] does not fit into available token window " - f"({self.available_window}). Skip it." - ) - self.collect_relation( - kind="skipped_span_does_not_fit_into_window", relation=coref_rel - ) - continue - - task_encodings.append( - TaskEncoding( - document=document, - inputs={ - "encoding": current_encoding, - "encoding_pair": current_encoding_pair, - "pooler_start_indices": token_span.start, - "pooler_end_indices": token_span.end, - "pooler_pair_start_indices": token_span_pair.start, - "pooler_pair_end_indices": token_span_pair.end, - }, - metadata={"candidate_annotation": coref_rel}, - ) - ) - self.collect_relation("used", coref_rel) - return task_encodings - - def encode_target( - self, - task_encoding: TaskEncoding[DocumentType, InputEncodingType, TargetEncodingType], - ) -> Optional[TargetEncodingType]: - return task_encoding.metadata["candidate_annotation"].score - - def collate( - self, - task_encodings: Sequence[ - TaskEncoding[DocumentType, InputEncodingType, TargetEncodingType] - ], - ) -> Tuple[ModelInputType, Optional[ModelTargetType]]: - inputs_dict = list_of_dicts2dict_of_lists( - [task_encoding.inputs for task_encoding in task_encodings] - ) - - inputs = { - k: ( - self.tokenizer.pad(v, return_tensors="pt").data - if k in ["encoding", "encoding_pair"] - else torch.tensor(v) - ) - for k, v in inputs_dict.items() - } - for k, v in inputs.items(): - if k.startswith("pooler_") and k.endswith("_indices"): - inputs[k] = v.unsqueeze(-1) - - if not task_encodings[0].has_targets: - return inputs, None - targets = { - "scores": torch.tensor([task_encoding.targets for task_encoding in task_encodings]) - } - return inputs, targets - - def configure_model_metric(self, stage: str) -> MetricCollection: - return MetricCollection( - metrics={ - "continuous": WrappedMetricWithPrepareFunction( - metric=MetricCollection( - { - "auroc": BinaryAUROC(), - "avg-P": BinaryAveragePrecision(validate_args=False), - # "roc": BinaryROC(validate_args=False), - # "PRCurve": BinaryPrecisionRecallCurve(validate_args=False), - "f1": BinaryF1Score(threshold=self.similarity_threshold), - } - ), - prepare_function=_get_scores, - ), - } - ) - - def unbatch_output(self, model_output: ModelTargetType) -> Sequence[TaskOutputType]: - is_similar = (model_output["scores"] > self.similarity_threshold).detach().cpu().tolist() - scores = model_output["scores"].detach().cpu().tolist() - result: List[TaskOutputType] = [ - {"is_similar": is_sim, "score": prob} for is_sim, prob in zip(is_similar, scores) - ] - return result - - def create_annotations_from_output( - self, - task_encoding: TaskEncoding[DocumentType, InputEncodingType, TargetEncodingType], - task_output: TaskOutputType, - ) -> Iterator[Tuple[str, Annotation]]: - if task_output["is_similar"]: - score = task_output["score"] - new_coref_rel = task_encoding.metadata["candidate_annotation"].copy(score=score) - yield "binary_coref_relations", new_coref_rel diff --git a/src/pie_modules/taskmodules/extractive_question_answering.py b/src/pie_modules/taskmodules/extractive_question_answering.py deleted file mode 100644 index d774cf18b..000000000 --- a/src/pie_modules/taskmodules/extractive_question_answering.py +++ /dev/null @@ -1,239 +0,0 @@ -import dataclasses -import logging -from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union - -import numpy as np -import torch -from pie_core import Annotation, AnnotationLayer, TaskEncoding, TaskModule -from tokenizers import Encoding -from transformers import AutoTokenizer, BatchEncoding, PreTrainedTokenizer -from transformers.modeling_outputs import QuestionAnsweringModelOutput -from typing_extensions import TypeAlias - -from pie_modules.annotations import ExtractiveAnswer, Question -from pie_modules.document.processing import tokenize_document -from pie_modules.documents import ( - TextBasedDocument, - TextDocumentWithQuestionsAndExtractiveAnswers, - TokenDocumentWithQuestionsAndExtractiveAnswers, -) - -logger = logging.getLogger(__name__) - - -DocumentType: TypeAlias = TextBasedDocument -InputEncoding: TypeAlias = Union[Dict[str, Any], BatchEncoding] - - -@dataclasses.dataclass -class TargetEncoding: - start_position: int - end_position: int - - -TaskEncodingType: TypeAlias = TaskEncoding[ - TextDocumentWithQuestionsAndExtractiveAnswers, - InputEncoding, - TargetEncoding, -] - -TaskBatchEncoding: TypeAlias = Tuple[BatchEncoding, Optional[Dict[str, Any]]] -ModelBatchOutput: TypeAlias = QuestionAnsweringModelOutput - - -@dataclasses.dataclass -class TaskOutput: - start: int - end: int - start_probability: float - end_probability: float - - -@TaskModule.register() -class ExtractiveQuestionAnsweringTaskModule(TaskModule): - """PIE task module for extractive question answering. - - This task module expects that the document is text based and contains an annotation layer for answers - and one for questions. - - The task module will create a task encoding for each question-answer pair. - The input encoding will be the tokenized document with the question as the second sequence. - The target encoding will be the start and end position of the answer in the context. - The task module will create a dummy target encoding where both start and end index are set to 0 (usually - the CLS token position), if there is no answer for the question. - - Args: - tokenizer_name_or_path: The name (Huggingface Hub identifier) or local path to a config of the tokenizer to use. - max_length: The maximum length of the input sequence in means of tokens. - answer_annotation: The name of the annotation layer for answers. Defaults to "answers". - question_annotation: The name of the annotation layer for questions. Defaults to "questions". - tokenize_kwargs: Additional keyword arguments for the tokenizer. Defaults to None. - """ - - DOCUMENT_TYPE = TextDocumentWithQuestionsAndExtractiveAnswers - - def __init__( - self, - tokenizer_name_or_path: str, - max_length: int, - answer_annotation: str = "answers", - question_annotation: str = "questions", - tokenize_kwargs: Optional[Dict[str, Any]] = None, - **kwargs, - ): - super().__init__(**kwargs) - self.save_hyperparameters() - - self.answer_annotation = answer_annotation - self.question_annotation = question_annotation - self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) - self.max_length = max_length - self.tokenize_kwargs = tokenize_kwargs or {} - - def get_answer_layer(self, document: DocumentType) -> AnnotationLayer[ExtractiveAnswer]: - # we expect that each document have an annotation layer for answers - # where each entry is of type ExtractiveAnswer - return document[self.answer_annotation] - - def get_question_layer(self, document: DocumentType) -> AnnotationLayer[Question]: - answers = self.get_answer_layer(document) - # we expect that the answers annotation layer targets the questions annotation layer - # where each entry is of type Question - return answers.target_layers[self.question_annotation] - - def get_context(self, document: DocumentType) -> str: - answers = self.get_answer_layer(document) - # we expect that the answers annotation layer targets the text field - # which is a simple string - return answers.targets["text"] - - def encode_input( - self, - document: DocumentType, - is_training: bool = False, - ) -> Optional[ - Union[ - TaskEncoding[DocumentType, InputEncoding, TargetEncoding], - Sequence[TaskEncoding[DocumentType, InputEncoding, TargetEncoding]], - ] - ]: - questions = self.get_question_layer(document) - task_encodings: List[TaskEncodingType] = [] - for question in questions: - tokenized_docs = tokenize_document( - document, - tokenizer=self.tokenizer, - text=question.text.strip(), - truncation="only_second", - max_length=self.max_length, - return_overflowing_tokens=True, - result_document_type=TokenDocumentWithQuestionsAndExtractiveAnswers, - strict_span_conversion=False, - verbose=False, - **self.tokenize_kwargs, - ) - for doc in tokenized_docs: - inputs = self.tokenizer.convert_tokens_to_ids(list(doc.tokens)) - task_encodings.append( - TaskEncodingType( - document=document, - inputs=inputs, - metadata=dict(question=question, tokenized_document=doc), - ) - ) - return task_encodings - - def encode_target( - self, - task_encoding: TaskEncodingType, - ) -> Optional[TargetEncoding]: - all_answers = self.get_answer_layer(task_encoding.metadata["tokenized_document"]) - # the document can contain multiple questions, so we filter the answers by the target question - answers = [ - answer - for answer in all_answers - if answer.question == task_encoding.metadata["question"] - ] - # if there is no answer for the target question, we return a dummy target encoding - if len(answers) == 0: - return TargetEncoding(0, 0) - if len(answers) > 1: - logger.warning( - f"The answers annotation layer is expected to have not more than one answer per question, " - f"but it has {len(answers)} answers. We take just the first one." - ) - answer = answers[0] - return TargetEncoding(answer.start, answer.end - 1) - - def collate( - self, task_encodings: Sequence[TaskEncoding[DocumentType, InputEncoding, TargetEncoding]] - ) -> TaskBatchEncoding: - def task_encoding2input_features(task_encoding: TaskEncodingType) -> Dict[str, Any]: - encoding = task_encoding.metadata["tokenized_document"].metadata["tokenizer_encoding"] - return {"input_ids": encoding.ids, "token_type_ids": encoding.type_ids} - - input_features = [ - task_encoding2input_features(task_encoding) for task_encoding in task_encodings - ] - - # will contain: input_ids, token_type_ids, attention_mask - inputs: BatchEncoding = self.tokenizer.pad( - input_features, padding="longest", max_length=self.max_length, return_tensors="pt" - ) - - if not task_encodings[0].has_targets: - return inputs, None - - start_positions = torch.tensor( - [task_encoding.targets.start_position for task_encoding in task_encodings], - dtype=torch.int64, - ) - end_positions = torch.tensor( - [task_encoding.targets.end_position for task_encoding in task_encodings], - dtype=torch.int64, - ) - targets = {"start_positions": start_positions, "end_positions": end_positions} - - return inputs, targets - - def unbatch_output(self, model_output: ModelBatchOutput) -> Sequence[TaskOutput]: - batch_size = len(model_output.start_logits) - start_probs = torch.softmax(model_output.start_logits, dim=-1).detach().cpu().numpy() - end_probs = torch.softmax(model_output.end_logits, dim=-1).detach().cpu().numpy() - best_start = np.argmax(start_probs, axis=1) - best_end = np.argmax(end_probs, axis=1) - return [ - TaskOutput( - start=best_start[i], - end=best_end[i], - start_probability=start_probs[i, best_start[i]], - end_probability=end_probs[i, best_end[i]], - ) - for i in range(batch_size) - ] - - def create_annotations_from_output( - self, - task_encoding: TaskEncoding[DocumentType, InputEncoding, TargetEncoding], - task_output: TaskOutput, - ) -> Iterator[Tuple[str, Annotation]]: - tokenizer_encoding: Encoding = task_encoding.metadata["tokenized_document"].metadata[ - "tokenizer_encoding" - ] - start_chars = tokenizer_encoding.token_to_chars(task_output.start) - end_chars = tokenizer_encoding.token_to_chars(task_output.end) - if start_chars is not None and end_chars is not None: - start_sequence_index = tokenizer_encoding.token_to_sequence(task_output.start) - end_sequence_index = tokenizer_encoding.token_to_sequence(task_output.end) - # the indices need to point into the context which is the second sequence - if start_sequence_index == 1 and end_sequence_index == 1: - start_char = start_chars[0] - end_char = end_chars[-1] - context = self.get_context(task_encoding.document) - if 0 <= start_char < end_char <= len(context): - yield self.answer_annotation, ExtractiveAnswer( - start=start_char, - end=end_char, - question=task_encoding.metadata["question"], - score=float(task_output.start_probability * task_output.end_probability), - ) diff --git a/src/pie_modules/taskmodules/labeled_span_extraction_by_token_classification.py b/src/pie_modules/taskmodules/labeled_span_extraction_by_token_classification.py deleted file mode 100644 index ca566a024..000000000 --- a/src/pie_modules/taskmodules/labeled_span_extraction_by_token_classification.py +++ /dev/null @@ -1,468 +0,0 @@ -""" -workflow: - Document - -> (InputEncoding, TargetEncoding) -> TaskEncoding - -> ModelStepInputType -> ModelBatchOutput - -> TaskOutput - -> Document -""" - -import logging -from functools import partial -from typing import ( - Any, - Dict, - Iterator, - List, - Optional, - Sequence, - Set, - Tuple, - Type, - TypedDict, - Union, -) - -import torch -from pie_core import AnnotationLayer, TaskEncoding, TaskModule -from pie_core.utils.dictionary import list_of_dicts2dict_of_lists -from tokenizers import Encoding -from torchmetrics import F1Score, Metric, MetricCollection, Precision, Recall -from transformers import AutoTokenizer -from typing_extensions import TypeAlias - -from pie_modules.annotations import LabeledSpan -from pie_modules.document.processing import ( - token_based_document_to_text_based, - tokenize_document, -) -from pie_modules.documents import ( - TextBasedDocument, - TextDocumentWithLabeledSpans, - TextDocumentWithLabeledSpansAndLabeledPartitions, - TokenDocumentWithLabeledSpans, - TokenDocumentWithLabeledSpansAndLabeledPartitions, -) -from pie_modules.models.simple_token_classification import InputType as ModelInputType -from pie_modules.models.simple_token_classification import TargetType as ModelTargetType -from pie_modules.taskmodules.metrics import ( - PrecisionRecallAndF1ForLabeledAnnotations, - WrappedMetricWithPrepareFunction, -) -from pie_modules.utils.sequence_tagging import tag_sequence_to_token_spans - -DocumentType: TypeAlias = TextBasedDocument - -InputEncodingType: TypeAlias = Encoding -TargetEncodingType: TypeAlias = Sequence[int] -TaskEncodingType: TypeAlias = TaskEncoding[ - DocumentType, - InputEncodingType, - TargetEncodingType, -] -ModelStepInputType: TypeAlias = Tuple[ - ModelInputType, - Optional[ModelTargetType], -] -ModelOutputType: TypeAlias = ModelTargetType - - -class TaskOutputType(TypedDict, total=False): - labels: torch.LongTensor - probabilities: torch.FloatTensor - - -TaskModuleType: TypeAlias = TaskModule[ - DocumentType, - InputEncodingType, - TargetEncodingType, - ModelStepInputType, - ModelOutputType, - TaskOutputType, -] - -logger = logging.getLogger(__name__) - - -def _get_label_ids_from_model_output( - model_output: ModelTargetType, -) -> torch.LongTensor: - return model_output["labels"] - - -def unbatch_and_decode_annotations( - model_output: ModelOutputType, - taskmodule: "LabeledSpanExtractionByTokenClassificationTaskModule", -) -> List[Sequence[LabeledSpan]]: - task_outputs = taskmodule.unbatch_output(model_output) - annotations = [ - taskmodule.decode_annotations(task_output)["labeled_spans"] for task_output in task_outputs - ] - return annotations - - -@TaskModule.register() -class LabeledSpanExtractionByTokenClassificationTaskModule(TaskModuleType): - """Taskmodule for span prediction (e.g. NER) as token classification. - - This taskmodule expects the input documents to be of TextBasedDocument with an annotation layer of - labeled spans (e.g. TextDocumentWithLabeledSpans). The text is tokenized using the provided tokenizer and - the labels are converted to BIO tags. - - To handle long documents, the text can be windowed using the respective parameters for the tokenizer, - i.e. max_length (and stride). Note, that this requires to set return_overflowing_tokens=True, otherwise just - the first window of input tokens is considered. The windowing is done in a way that the spans are not split - across windows. If a span is split across windows, it is ignored during training and evaluation. Thus, if you - have long spans in your data, it is recommended to set a stride that is as large as the average span length - to avoid missing many spans. - - If a partition annotation is provided, the taskmodule expects the input documents to be of - TextBasedDocument with two annotation layers of labeled spans, one for the spans and one for the partitions - (e.g. TextDocumentWithLabeledSpansAndLabeledPartitions). Then, the text is tokenized and fed to the model - individually per partition (e.g. per sentence). This is useful for long documents that can not be processed - by the model as a whole, but where a natural partitioning exists (e.g. sentences or paragraphs) and, thus, - windowing is not necessary (or a combination of both can be used). - - If labels are not provided, they are collected from the data during the prepare() step. If provided, they act as - whitelist, i.e. spans with labels that are not in the labels are ignored during training and evaluation. - - Args: - tokenizer_name_or_path: Name or path of the HuggingFace tokenizer to use. - span_annotation: Name of the annotation layer that contains the labeled spans. Default: "labeled_spans". - partition_annotation: Name of the annotation layer that contains the labeled partitions. If provided, the - text is tokenized individually per partition. Default: None. - label_pad_id: ID of the padding tag label. The model should ignore this for training. Default: -100. - labels: List of labels to use. If not provided, the labels are collected from the labeled span annotations - in the data during the prepare() step. Default: None. - include_ill_formed_predictions: Whether to include ill-formed predictions in the output. If False, the - predictions are corrected to be well-formed. Default: True. - tokenize_kwargs: Keyword arguments to pass to the tokenizer during tokenization. Default: None. - pad_kwargs: Keyword arguments to pass to the tokenizer during padding. Note, that this is used to pad the - token ids *and* the tag ids, if available (i.e. during training or evaluation). Default: None. - combine_token_scores_method: Method to combine the token scores to a span score. Options are "mean", "max", - "min", and "product". Default: "mean". - log_precision_recall_metrics: Whether to log precision and recall metrics (in addition to F1) for the - spans. Default: True. - """ - - # list of attribute names that need to be set by _prepare() - PREPARED_ATTRIBUTES: List[str] = ["labels"] - - def __init__( - self, - tokenizer_name_or_path: str, - span_annotation: str = "labeled_spans", - partition_annotation: Optional[str] = None, - label_pad_id: int = -100, - labels: Optional[List[str]] = None, - include_ill_formed_predictions: bool = True, - tokenize_kwargs: Optional[Dict[str, Any]] = None, - pad_kwargs: Optional[Dict[str, Any]] = None, - combine_token_scores_method: str = "mean", - log_precision_recall_metrics: bool = True, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.save_hyperparameters() - - self.span_annotation = span_annotation - self.partition_annotation = partition_annotation - self.labels = labels - self.label_pad_id = label_pad_id - self.include_ill_formed_predictions = include_ill_formed_predictions - self.tokenize_kwargs = tokenize_kwargs or {} - self.pad_kwargs = pad_kwargs or {} - self.log_precision_recall_metrics = log_precision_recall_metrics - self.combine_token_scores_method = combine_token_scores_method - - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) - - @property - def document_type(self) -> Optional[Type[TextBasedDocument]]: - dt: Type[TextBasedDocument] - errors = [] - if self.span_annotation != "labeled_spans": - errors.append( - f"span_annotation={self.span_annotation} is not the default value ('labeled_spans')" - ) - if self.partition_annotation is None: - dt = TextDocumentWithLabeledSpans - else: - if self.partition_annotation != "labeled_partitions": - errors.append( - f"partition_annotation={self.partition_annotation} is not the default value " - f"('labeled_partitions')" - ) - dt = TextDocumentWithLabeledSpansAndLabeledPartitions - - if len(errors) == 0: - return dt - else: - logger.warning( - f"{' and '.join(errors)}, so the taskmodule {type(self).__name__} can not request " - f"the usual document type ({dt.__name__}) for auto-conversion because this has the bespoken default " - f"value as layer name(s) instead of the provided one(s)." - ) - return None - - def get_span_layer(self, document: DocumentType) -> AnnotationLayer[LabeledSpan]: - return document[self.span_annotation] - - def _prepare(self, documents: Sequence[DocumentType]) -> None: - # collect all possible labels - labels: Set[str] = set() - for document in documents: - spans: AnnotationLayer[LabeledSpan] = self.get_span_layer(document) - - for span in spans: - labels.add(span.label) - - self.labels = sorted(labels) - logger.info(f"Collected {len(self.labels)} labels from the data: {self.labels}") - - def _post_prepare(self): - # create the real token labels (BIO scheme) from the labels - self.label_to_id = {"O": 0} - current_id = 1 - for label in sorted(self.labels): - for prefix in ["B", "I"]: - self.label_to_id[f"{prefix}-{label}"] = current_id - current_id += 1 - - self.id_to_label = {v: k for k, v in self.label_to_id.items()} - - def encode_input( - self, - document: TextBasedDocument, - ) -> Optional[Union[TaskEncodingType, Sequence[TaskEncodingType]]]: - if self.partition_annotation is None: - tokenized_document_type = TokenDocumentWithLabeledSpans - casted_document_type = TextDocumentWithLabeledSpans - field_mapping = {self.span_annotation: "labeled_spans"} - else: - tokenized_document_type = TokenDocumentWithLabeledSpansAndLabeledPartitions - casted_document_type = TextDocumentWithLabeledSpansAndLabeledPartitions - field_mapping = { - self.span_annotation: "labeled_spans", - self.partition_annotation: "labeled_partitions", - } - casted_document = document.as_type(casted_document_type, field_mapping=field_mapping) - tokenized_docs = tokenize_document( - casted_document, - tokenizer=self.tokenizer, - result_document_type=tokenized_document_type, - partition_layer=( - "labeled_partitions" if self.partition_annotation is not None else None - ), - strict_span_conversion=False, - **self.tokenize_kwargs, - ) - - task_encodings: List[TaskEncodingType] = [] - for tokenized_doc in tokenized_docs: - task_encodings.append( - TaskEncoding( - document=document, - inputs=tokenized_doc.metadata["tokenizer_encoding"], - metadata={"tokenized_document": tokenized_doc}, - ) - ) - - return task_encodings - - def encode_target( - self, - task_encoding: TaskEncodingType, - ) -> Optional[TargetEncodingType]: - metadata = task_encoding.metadata - tokenized_document = metadata["tokenized_document"] - tokenizer_encoding: Encoding = tokenized_document.metadata["tokenizer_encoding"] - - tag_sequence = [ - None if tokenizer_encoding.special_tokens_mask[j] else "O" - for j in range(len(tokenizer_encoding.ids)) - ] - if self.labels is None: - raise ValueError( - "'labels' must be set before calling encode_target(). Was prepare() called on the taskmodule?" - ) - sorted_spans = sorted(tokenized_document.labeled_spans, key=lambda s: (s.start, s.end)) - for span in sorted_spans: - if span.label not in self.labels: - continue - start = span.start - end = span.end - if any(tag != "O" for tag in tag_sequence[start:end]): - logger.warning(f"tag already assigned (current span has an overlap: {span}).") - continue - - tag_sequence[start] = f"B-{span.label}" - for j in range(start + 1, end): - tag_sequence[j] = f"I-{span.label}" - - targets = [ - self.label_to_id[tag] if tag is not None else self.label_pad_id for tag in tag_sequence - ] - - return targets - - def collate(self, task_encodings: Sequence[TaskEncodingType]) -> ModelStepInputType: - input_encodings = [ - { - "input_ids": task_encoding.inputs.ids, - "attention_mask": task_encoding.inputs.attention_mask, - "special_tokens_mask": task_encoding.inputs.special_tokens_mask, - } - for task_encoding in task_encodings - ] - inputs = self.tokenizer.pad( - list_of_dicts2dict_of_lists(input_encodings), return_tensors="pt", **self.pad_kwargs - ) - - if not task_encodings[0].has_targets: - return inputs, None - - tag_ids = [task_encoding.targets for task_encoding in task_encodings] - targets = self.tokenizer.pad( - {"input_ids": tag_ids}, return_tensors="pt", **self.pad_kwargs - )["input_ids"] - - # set the padding label to the label_pad_token_id - pad_mask = inputs["input_ids"] == self.tokenizer.pad_token_id - targets[pad_mask] = self.label_pad_id - - return inputs, {"labels": targets} - - def unbatch_output(self, model_output: ModelOutputType) -> Sequence[TaskOutputType]: - labels = model_output["labels"] - probabilities = model_output.get("probabilities", None) - batch_size = labels.shape[0] - task_outputs: List[TaskOutputType] = [] - for batch_idx in range(batch_size): - task_output: TaskOutputType = {"labels": labels[batch_idx]} - if probabilities is not None: - task_output["probabilities"] = probabilities[batch_idx] - task_outputs.append(task_output) - return task_outputs - - def decode_annotations(self, encoding: TaskOutputType) -> Dict[str, Sequence[LabeledSpan]]: - labels = encoding["labels"] - tag_sequence = [ - "O" if tag_id == self.label_pad_id else self.id_to_label[tag_id] - for tag_id in labels.tolist() - ] - labeled_spans: List[LabeledSpan] = [] - for label, (start, end_inclusive) in tag_sequence_to_token_spans( - tag_sequence, - coding_scheme="IOB2", - include_ill_formed=self.include_ill_formed_predictions, - ): - end = end_inclusive + 1 - # do not set the score if the probabilities are not available - annotation_kwargs = {} - if encoding.get("probabilities") is not None: - span_probabilities = encoding["probabilities"][start:end] - span_label_ids = labels[start:end] - # get the probabilities at the label indices - span_label_probs = torch.stack( - [span_probabilities[i, l] for i, l in enumerate(span_label_ids)] - ) - if self.combine_token_scores_method == "mean": - # use mean probability of the span as score - annotation_kwargs["score"] = span_label_probs.mean().item() - elif self.combine_token_scores_method == "max": - # use max probability of the span as score - annotation_kwargs["score"] = span_label_probs.max().item() - elif self.combine_token_scores_method == "min": - # use min probability of the span as score - annotation_kwargs["score"] = span_label_probs.min().item() - elif self.combine_token_scores_method == "product": - # use product of probabilities of the span as score - annotation_kwargs["score"] = span_label_probs.prod().item() - else: - raise ValueError( - f"combine_token_scores_method={self.combine_token_scores_method} is not supported." - ) - labeled_span = LabeledSpan(label=label, start=start, end=end, **annotation_kwargs) - labeled_spans.append(labeled_span) - return {"labeled_spans": labeled_spans} - - def create_annotations_from_output( - self, - task_encoding: TaskEncodingType, - task_output: TaskOutputType, - ) -> Iterator[Tuple[str, LabeledSpan]]: - tokenized_document = task_encoding.metadata["tokenized_document"] - decoded_annotations = self.decode_annotations(task_output) - - # Note: token_based_document_to_text_based() does not yet consider predictions, so we need to clear - # the main annotations and attach the predictions to that - for layer_name, annotations in decoded_annotations.items(): - tokenized_document[layer_name].clear() - for annotation in annotations: - tokenized_document[layer_name].append(annotation) - - # we can not use self.document_type here because that may be None if self.span_annotation or - # self.partition_annotation is not the default value - document_type = ( - TextDocumentWithLabeledSpansAndLabeledPartitions - if self.partition_annotation - else TextDocumentWithLabeledSpans - ) - untokenized_document: Union[ - TextDocumentWithLabeledSpans, TextDocumentWithLabeledSpansAndLabeledPartitions - ] = token_based_document_to_text_based( - tokenized_document, result_document_type=document_type - ) - - for span in untokenized_document.labeled_spans: - # need to copy the span because it can be attached to only one document - yield self.span_annotation, span.copy() - - def configure_model_metric(self, stage: str) -> Union[Metric, MetricCollection]: - common_metric_kwargs = { - "num_classes": len(self.label_to_id), - "task": "multiclass", - "ignore_index": self.label_pad_id, - } - token_scores = MetricCollection( - { - "token/macro/f1": WrappedMetricWithPrepareFunction( - metric=F1Score(average="macro", **common_metric_kwargs), - prepare_function=_get_label_ids_from_model_output, - ), - "token/micro/f1": WrappedMetricWithPrepareFunction( - metric=F1Score(average="micro", **common_metric_kwargs), - prepare_function=_get_label_ids_from_model_output, - ), - "token/macro/precision": WrappedMetricWithPrepareFunction( - metric=Precision(average="macro", **common_metric_kwargs), - prepare_function=_get_label_ids_from_model_output, - ), - "token/macro/recall": WrappedMetricWithPrepareFunction( - metric=Recall(average="macro", **common_metric_kwargs), - prepare_function=_get_label_ids_from_model_output, - ), - "token/micro/precision": WrappedMetricWithPrepareFunction( - metric=Precision(average="micro", **common_metric_kwargs), - prepare_function=_get_label_ids_from_model_output, - ), - "token/micro/recall": WrappedMetricWithPrepareFunction( - metric=Recall(average="micro", **common_metric_kwargs), - prepare_function=_get_label_ids_from_model_output, - ), - } - ) - - span_scores = PrecisionRecallAndF1ForLabeledAnnotations( - flatten_result_with_sep="/", - prefix="span/", - return_recall_and_precision=self.log_precision_recall_metrics, - ) - span_scores_wrapped = WrappedMetricWithPrepareFunction( - metric=span_scores, - prepare_function=partial(unbatch_and_decode_annotations, taskmodule=self), - prepare_does_unbatch=True, - ) - - return MetricCollection([token_scores, span_scores_wrapped]) diff --git a/src/pie_modules/taskmodules/metrics/__init__.py b/src/pie_modules/taskmodules/metrics/__init__.py deleted file mode 100644 index 8d23c8efb..000000000 --- a/src/pie_modules/taskmodules/metrics/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .precision_recall_and_f1_for_labeled_annotations import ( - PrecisionRecallAndF1ForLabeledAnnotations, -) -from .wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function import ( - WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction, -) -from .wrapped_metric_with_prepare_function import WrappedMetricWithPrepareFunction diff --git a/src/pie_modules/taskmodules/metrics/common.py b/src/pie_modules/taskmodules/metrics/common.py deleted file mode 100644 index 18c8361eb..000000000 --- a/src/pie_modules/taskmodules/metrics/common.py +++ /dev/null @@ -1,38 +0,0 @@ -import logging -from abc import ABC -from typing import Dict, Optional - -import torch -from torch import LongTensor, Tensor -from torchmetrics import Metric - -logger = logging.getLogger(__name__) - - -class MetricWithArbitraryCounts(Metric, ABC): - """A metric that hold counts for arbitrary keys.""" - - def inc_counts(self, counts: LongTensor, key: Optional[str], prefix: str = "counts_"): - full_key = prefix - if key is not None: - full_key += key - - if not hasattr(self, full_key): - self.add_state(full_key, default=torch.zeros_like(counts), dist_reduce_fx="sum") - - prev_value = getattr(self, full_key) - setattr(self, full_key, prev_value + counts) - - def get_counts(self, key_prefix: str = "counts_") -> Dict[Optional[str], LongTensor]: - result = {} - for k, v in self.metric_state.items(): - if k.startswith(key_prefix): - if not isinstance(v, Tensor): - raise ValueError( - f"Expected metric state for key {k} to be a LongTensor, but got {type(v)}." - ) - if not isinstance(v, LongTensor): - v = v.long() - key = k[len(key_prefix) :] or None - result[key] = v - return result diff --git a/src/pie_modules/taskmodules/metrics/precision_recall_and_f1_for_labeled_annotations.py b/src/pie_modules/taskmodules/metrics/precision_recall_and_f1_for_labeled_annotations.py deleted file mode 100644 index 2bb83eb4c..000000000 --- a/src/pie_modules/taskmodules/metrics/precision_recall_and_f1_for_labeled_annotations.py +++ /dev/null @@ -1,137 +0,0 @@ -import logging -from collections import Counter -from typing import Any, Collection, Dict, Iterable, Optional, Union - -import torch -from pie_core import Annotation -from pie_core.utils.dictionary import flatten_dict_s -from torch import LongTensor - -from .common import MetricWithArbitraryCounts - -logger = logging.getLogger(__name__) - - -class PrecisionRecallAndF1ForLabeledAnnotations(MetricWithArbitraryCounts): - """Computes precision, recall and F1 for labeled annotations. Inputs and targets are lists of - annotations. True positives are counted as the number of annotations that are the same in both - inputs and targets calculated as exact matches via set operation, false positives and false - negatives accordingly. The annotations are deduplicated for each instance. But if the same - annotation occurs in different instances, it is counted as two separate annotations. - - Args: - label_mapping: A dictionary mapping annotation labels to human-readable labels. If None, - the annotation labels are used as they are. Can be used to map label ids to string labels. - key_micro: The key to use for the micro-average in the metric result dictionary. - in_percent: Whether to return the results in percent, i.e. values between 0 and 100 instead of - between 0 and 1. - flatten_result_with_sep: If not None, the result dictionary is flattened and the keys of the - different nesting levels are concatenated with the given separator. - prefix: If not None, the most outer keys of the result dictionary are prefixed with this string. - return_recall_and_precision: Whether to return recall and precision in addition to F1. - """ - - def __init__( - self, - label_mapping: Optional[Dict[Any, str]] = None, - key_micro: Optional[str] = "micro", - key_macro: Optional[str] = "macro", - in_percent: bool = False, - flatten_result_with_sep: Optional[str] = None, - prefix: Optional[str] = None, - return_recall_and_precision: bool = True, - ): - super().__init__() - self.label_mapping = label_mapping - self.key_micro = key_micro - self.key_macro = key_macro - self.in_percent = in_percent - self.flatten_result_with_sep = flatten_result_with_sep - self.prefix = prefix - self.return_recall_and_precision = return_recall_and_precision - - def update(self, gold: Iterable[Annotation], predicted: Iterable[Annotation]) -> None: - # remove duplicates within each list - gold_set = set(gold) - predicted_set = set(predicted) - new_counts = self.calculate_counts(gold_set, predicted_set, gold_set & predicted_set) - for k, v in new_counts.items(): - self.inc_counts(counts=v, key=k) - - def get_precision_recall_f1(self, n_gold_predicted_correct: LongTensor) -> Dict[str, float]: - n_gold = n_gold_predicted_correct[0] - n_predicted = n_gold_predicted_correct[1] - n_correct = n_gold_predicted_correct[2] - zero = torch.tensor(0.0).to(self.device) - recall = zero if n_gold == 0 else (n_correct / n_gold) - precision = zero if n_predicted == 0 else (n_correct / n_predicted) - f1 = zero if recall + precision == 0 else (2 * precision * recall) / (precision + recall) - - result = {"f1": f1} - if self.return_recall_and_precision: - result["recall"] = recall - result["precision"] = precision - - if self.in_percent: - result = {k: v * 100 for k, v in result.items()} - return result - - def get_label(self, annotation: Annotation) -> Optional[str]: - label: Optional[str] = getattr(annotation, "label", None) - if self.label_mapping is not None: - return self.label_mapping[label] - return label - - def calculate_counts( - self, - gold: Collection[Annotation], - predicted: Collection[Annotation], - correct: Collection[Annotation], - ) -> Dict[Optional[str], LongTensor]: - result = {} - # per class - gold_counter = Counter([self.get_label(ann) for ann in gold]) - predicted_counter = Counter([self.get_label(ann) for ann in predicted]) - correct_counter = Counter([self.get_label(ann) for ann in correct]) - for label in gold_counter.keys() | predicted_counter.keys(): - if self.key_micro is not None and label == self.key_micro: - raise ValueError( - f"The key '{self.key_micro}' was used as an annotation label, but it is reserved for " - f"the micro average. You can change which key is used for that with the 'key_micro' argument." - ) - result[label] = torch.tensor( - [ - gold_counter.get(label, 0), - predicted_counter.get(label, 0), - correct_counter.get(label, 0), - ] - ).to(device=self.device) - - # overall - if self.key_micro is not None: - result[self.key_micro] = torch.tensor([len(gold), len(predicted), len(correct)]).to( - device=self.device - ) - return result - - def compute(self) -> Union[Dict[str, Any], Dict[Optional[str], dict[str, float]]]: - counts = self.get_counts() - result = {label: self.get_precision_recall_f1(counts[label]) for label in counts.keys()} - if self.key_macro is not None: - result_without_micro = { - k: v for k, v in result.items() if self.key_micro is None or k != self.key_micro - } - if len(result_without_micro) > 0: - sub_keys = list(result_without_micro.values())[0].keys() - result[self.key_macro] = { - k: torch.stack([v[k] for v in result_without_micro.values()]).mean() - for k in sub_keys - } - - if self.prefix is not None: - result = {f"{self.prefix}{k}": v for k, v in result.items()} - - if self.flatten_result_with_sep is not None: - return flatten_dict_s(result, sep=self.flatten_result_with_sep) - else: - return result diff --git a/src/pie_modules/taskmodules/metrics/wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py b/src/pie_modules/taskmodules/metrics/wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py deleted file mode 100644 index c7c45aaea..000000000 --- a/src/pie_modules/taskmodules/metrics/wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py +++ /dev/null @@ -1,147 +0,0 @@ -import logging -from typing import Any, Callable, Dict, Generic, Optional, Sequence, Tuple, TypeVar - -import torch -from torch.nn import ModuleDict -from torchmetrics import Metric - -from .common import MetricWithArbitraryCounts - -logger = logging.getLogger(__name__) -T = TypeVar("T") -U = TypeVar("U") - - -class WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction( - MetricWithArbitraryCounts, Generic[T, U] -): - """A wrapper around annotation layer metrics that can be used with batched encoded annotations. - - Args: - layer_metrics: A dictionary mapping layer names to annotation layer metrics. Each metric - should be a subclass of torchmetrics.Metric and should take two sets of annotations as - input. - unbatch_function: A function that takes a batched input and returns an iterable of - individual inputs. This is used to unbatch the input before passing it to the annotation - decoding function (decode_annotations_with_errors_function). - decode_layers_with_errors_function: A function that takes an annotation encoding and - returns a tuple of two dictionaries. The first dictionary maps layer names to a list of - annotations. The second dictionary maps error names to the number of errors that were - encountered while decoding the annotations. - round_precision: The number of digits to round the results to. If None, no rounding is - performed. - error_key_correct: The key in the error dictionary whose value should be the number of *correctly* - decoded annotations, so that the sum of all values in the error dictionary can be used to - normalize the error counts. If None, the total number of training examples is used to - normalize the error counts. - collect_exact_encoding_matches: Whether to collect the number of examples where the full target encoding - was predicted correctly (exact matches). - """ - - def __init__( - self, - layer_metrics: Dict[str, Metric], - unbatch_function: Callable[[T], Sequence[U]], - decode_layers_with_errors_function: Callable[[U], Tuple[Dict[str, Any], Dict[str, int]]], - round_precision: Optional[int] = 4, - error_key_correct: Optional[str] = None, - collect_exact_encoding_matches: bool = True, - ): - super().__init__() - - self.key_error_correct = error_key_correct - self.collect_exact_encoding_matches = collect_exact_encoding_matches - self.round_precision = round_precision - self.unbatch_function = unbatch_function - self.decode_layers_with_errors_function = decode_layers_with_errors_function - self.layer_metrics = ModuleDict(layer_metrics) - - # total number of encodings - self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum") - # this contains the number of examples where the full target sequence was predicted correctly (exact matches) - self.add_state("exact_encoding_matches", default=torch.tensor(0), dist_reduce_fx="sum") - # note: the error counts are stored via the MetricWithArbitraryCounts base class - - def update(self, prediction, expected): - prediction_list = self.unbatch_function(prediction) - expected_list = self.unbatch_function(expected) - if len(prediction_list) != len(expected_list): - raise ValueError( - f"Number of predictions ({len(prediction_list)}) and targets ({len(expected_list)}) do not match." - ) - - for expected_encoding, prediction_encoding in zip(expected_list, prediction_list): - expected_layers, _ = self.decode_layers_with_errors_function(expected_encoding) - predicted_layers, predicted_errors = self.decode_layers_with_errors_function( - prediction_encoding - ) - for k, v in predicted_errors.items(): - self.inc_counts(counts=torch.tensor(v).to(self.device), key=k, prefix="errors_") - - for layer_name, metric in self.layer_metrics.items(): - metric.update(expected_layers[layer_name], predicted_layers[layer_name]) - - if self.collect_exact_encoding_matches: - if isinstance(expected_encoding, torch.Tensor) and isinstance( - prediction_encoding, torch.Tensor - ): - is_match = torch.equal(expected_encoding, prediction_encoding) - else: - is_match = expected_encoding == prediction_encoding - if is_match: - self.exact_encoding_matches += 1 - - self.total += 1 - - def reset(self): - super().reset() - - for metric in self.layer_metrics.values(): - metric.reset() - - def _nested_round(self, d: Dict[str, Any]) -> Dict[str, Any]: - if self.round_precision is None: - return d - res: Dict[str, Any] = {} - for k, v in d.items(): - if isinstance(v, dict): - res[k] = self._nested_round(v) - elif isinstance(v, float): - res[k] = round(v, self.round_precision) - else: - res[k] = v - return res - - def compute(self): - res = {} - if self.collect_exact_encoding_matches: - res["exact_encoding_matches"] = ( - self.exact_encoding_matches / self.total if self.total > 0 else 0.0 - ) - - errors = self.get_counts(key_prefix="errors_") - # if errors contains a "correct" key, use that to normalize, otherwise use the number of training examples - if self.key_error_correct in errors: - errors_total = sum(errors.values()) - else: - errors_total = self.total - res["decoding_errors"] = { - k: v / errors_total if errors_total > 0 else 0.0 for k, v in errors.items() - } - if "all" not in res["decoding_errors"]: - res["decoding_errors"]["all"] = ( - sum(v for k, v in errors.items() if k != self.key_error_correct) / errors_total - if errors_total > 0 - else 0.0 - ) - - for layer_name, metric in self.layer_metrics.items(): - if layer_name in res: - raise ValueError( - f"Layer name '{layer_name}' is already used in the metric result dictionary." - ) - res[layer_name] = metric.compute() - - res = self._nested_round(res) - - return res diff --git a/src/pie_modules/taskmodules/metrics/wrapped_metric_with_prepare_function.py b/src/pie_modules/taskmodules/metrics/wrapped_metric_with_prepare_function.py deleted file mode 100644 index 7daceb9dd..000000000 --- a/src/pie_modules/taskmodules/metrics/wrapped_metric_with_prepare_function.py +++ /dev/null @@ -1,129 +0,0 @@ -import logging -from collections.abc import Collection, Sized -from typing import Any, Callable, Dict, Generic, List, Optional, Tuple, TypeVar, Union - -from torch import Tensor -from torchmetrics import Metric, MetricCollection -from torchmetrics.wrappers.abstract import WrapperMetric - -logger = logging.getLogger(__name__) - -T = TypeVar("T") -T2 = TypeVar("T2") - - -class WrappedMetricWithPrepareFunction(WrapperMetric, Generic[T]): - """A wrapper around a metric that can be used with predictions and targets that are need to be - prepared (e.g. un-batched) before passing them to the metric. - - Args: - metric: The metric to wrap. It should be a subclass of torchmetrics.Metric. - prepare_function: A function that prepares the input for the metric. If provided, It is called with - the predictions as well as the targets (separately). - prepare_together_function: A function that prepares both the predictions and the targets together and - should return them as a tuple. If provided, it is called with the predictions and the targets as - arguments. - prepare_does_unbatch: If True, the prepare_function is expected to return an iterable of - individual inputs. This can be used to un-batch the input before passing it to the - wrapped metric. - """ - - def __init__( - self, - metric: Union[Metric, MetricCollection], - prepare_function: Optional[Callable[[T], Any]] = None, - prepare_together_function: Optional[Callable[[T, T], Tuple[Any, Any]]] = None, - prepare_does_unbatch: bool = False, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.metric = metric - self.prepare_function = prepare_function - self.prepare_both_function = prepare_together_function - self.prepare_does_unbatch = prepare_does_unbatch - - def _is_empty_batch(self, prediction: T2, target: T2) -> bool: - if isinstance(prediction, Sized) and isinstance(target, Sized): - pred_len = len(prediction) - target_len = len(target) - else: - raise ValueError( - "Both prediction and target need to be sized when prepare_does_unbatch=False." - ) - if pred_len != target_len: - raise ValueError( - f"Number of elements in prediction ({pred_len}) and target ({target_len}) do not match." - ) - if pred_len == 0: - return True - return False - - def forward(self, prediction: T, target: T) -> Any: - if self.prepare_function is not None: - prediction = self.prepare_function(prediction) - target = self.prepare_function(target) - if self.prepare_both_function is not None: - prediction, target = self.prepare_both_function(prediction, target) - if self.prepare_does_unbatch: - if not isinstance(prediction, Collection) or not isinstance(target, Collection): - raise ValueError( - "Both prediction and target need to be iterable and sized when prepare_does_unbatch=True." - ) - if len(prediction) != len(target): - raise ValueError( - f"Number of prepared predictions ({len(prediction)}) and targets " - f"({len(target)}) do not match." - ) - if len(prediction) == 0: - raise ValueError("Empty batch.") - results = [] - for prediction_str, target_str in zip(prediction, target): - current_result = self.metric(prediction_str, target_str) - results.append(current_result) - return results - else: - if not self._is_empty_batch(prediction, target): - return self.metric(prediction, target) - else: - return None - - def update(self, prediction: T, target: T) -> None: - if self.prepare_function is not None: - prediction = self.prepare_function(prediction) - target = self.prepare_function(target) - if self.prepare_both_function is not None: - prediction, target = self.prepare_both_function(prediction, target) - if self.prepare_does_unbatch: - if not isinstance(prediction, Collection) or not isinstance(target, Collection): - raise ValueError( - "Both prediction and target need to be iterable and sized when prepare_does_unbatch=True." - ) - if len(prediction) != len(target): - raise ValueError( - f"Number of prepared predictions ({len(prediction)}) and targets " - f"({len(target)}) do not match." - ) - if len(prediction) == 0: - raise ValueError("Empty batch.") - for prediction_str, target_str in zip(prediction, target): - self.metric.update(prediction_str, target_str) - else: - if not self._is_empty_batch(prediction, target): - self.metric.update(prediction, target) - - def compute(self) -> Any: - return self.metric.compute() - - def reset(self) -> None: - self.metric.reset() - - @property - def metric_state(self) -> Dict[str, Union[List[Tensor], Tensor]]: - if isinstance(self.metric, Metric): - return self.metric.metric_state - elif isinstance(self.metric, MetricCollection): - return { - metric_name: metric.metric_state for metric_name, metric in self.metric.items() - } - else: - raise ValueError(f"Unsupported metric type: {type(self.metric)}") diff --git a/src/pie_modules/taskmodules/pointer_network/__init__.py b/src/pie_modules/taskmodules/pointer_network/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/pie_modules/taskmodules/pointer_network/annotation_encoder_decoder.py b/src/pie_modules/taskmodules/pointer_network/annotation_encoder_decoder.py deleted file mode 100644 index 5d48629bd..000000000 --- a/src/pie_modules/taskmodules/pointer_network/annotation_encoder_decoder.py +++ /dev/null @@ -1,397 +0,0 @@ -import logging -from collections import defaultdict -from typing import Any, Dict, List, Optional, Set, Tuple - -from pie_modules.annotations import BinaryRelation, LabeledSpan, Span -from pie_modules.taskmodules.common import AnnotationEncoderDecoder, DecodingException - -logger = logging.getLogger(__name__) - - -class DecodingLengthException(DecodingException[List[int]]): - identifier = "len" - - -class DecodingOrderException(DecodingException[List[int]]): - identifier = "order" - - -class DecodingSpanOverlapException(DecodingException[List[int]]): - identifier = "overlap" - - -class DecodingLabelException(DecodingException[List[int]]): - identifier = "label" - - -class DecodingNegativeIndexException(DecodingException[List[int]]): - identifier = "index" - - -KEY_INVALID_CORRECT = "correct" - - -class SpanEncoderDecoder(AnnotationEncoderDecoder[Span, List[int]]): - def __init__(self, exclusive_end: bool = True): - self.exclusive_end = exclusive_end - - def encode(self, annotation: Span, metadata: Optional[Dict[str, Any]] = None) -> List[int]: - end_idx = annotation.end - if not self.exclusive_end: - end_idx -= 1 - return [annotation.start, end_idx] - - def decode(self, encoding: List[int], metadata: Optional[Dict[str, Any]] = None) -> Span: - if len(encoding) != 2: - raise DecodingLengthException( - f"two values are required to decode as Span, but encoding has length {len(encoding)}", - encoding=encoding, - ) - end_idx = encoding[1] - if not self.exclusive_end: - end_idx += 1 - if end_idx < encoding[0]: - raise DecodingOrderException( - f"end index can not be smaller than start index, but got: start={encoding[0]}, " - f"end={end_idx}", - encoding=encoding, - ) - if any(idx < 0 for idx in encoding): - raise DecodingNegativeIndexException( - f"indices must be positive, but got: {encoding}", encoding=encoding - ) - return Span(start=encoding[0], end=end_idx) - - -class SpanEncoderDecoderWithOffset(SpanEncoderDecoder): - def __init__(self, offset: int, **kwargs): - super().__init__(**kwargs) - self.offset = offset - - def encode(self, annotation: Span, metadata: Optional[Dict[str, Any]] = None) -> List[int]: - encoding = super().encode(annotation=annotation, metadata=metadata) - return [x + self.offset for x in encoding] - - def decode(self, encoding: List[int], metadata: Optional[Dict[str, Any]] = None) -> Span: - encoding = [x - self.offset for x in encoding] - return super().decode(encoding=encoding, metadata=metadata) - - -class LabeledSpanEncoderDecoder(AnnotationEncoderDecoder[LabeledSpan, List[int]]): - def __init__( - self, - span_encoder_decoder: AnnotationEncoderDecoder[Span, List[int]], - label2id: Dict[str, int], - mode: str, - ): - self.span_encoder_decoder = span_encoder_decoder - self.label2id = label2id - self.id2label = {idx: label for label, idx in self.label2id.items()} - self.mode = mode - - def encode( - self, annotation: LabeledSpan, metadata: Optional[Dict[str, Any]] = None - ) -> List[int]: - encoded_span = self.span_encoder_decoder.encode(annotation=annotation, metadata=metadata) - encoded_label = self.label2id[annotation.label] - if self.mode == "indices_label": - return encoded_span + [encoded_label] - elif self.mode == "label_indices": - return [encoded_label] + encoded_span - else: - raise ValueError(f"unknown mode: {self.mode}") - - def decode( - self, encoding: List[int], metadata: Optional[Dict[str, Any]] = None - ) -> LabeledSpan: - if self.mode == "label_indices": - encoded_label = encoding[0] - encoded_span = encoding[1:] - elif self.mode == "indices_label": - encoded_label = encoding[-1] - encoded_span = encoding[:-1] - else: - raise ValueError(f"unknown mode: {self.mode}") - - decoded_span = self.span_encoder_decoder.decode(encoding=encoded_span, metadata=metadata) - if encoded_label not in self.id2label: - raise DecodingLabelException( - f"unknown label id: {encoded_label} (label2id: {self.label2id})", encoding=encoding - ) - result = LabeledSpan( - start=decoded_span.start, - end=decoded_span.end, - label=self.id2label[encoded_label], - ) - return result - - -class BinaryRelationEncoderDecoder(AnnotationEncoderDecoder[BinaryRelation, List[int]]): - def __init__( - self, - head_encoder_decoder: AnnotationEncoderDecoder[Span, List[int]], - tail_encoder_decoder: AnnotationEncoderDecoder[Span, List[int]], - label2id: Dict[str, int], - mode: str, - loop_dummy_relation_name: Optional[str] = None, - none_label: Optional[str] = None, - ): - self.head_encoder_decoder = head_encoder_decoder - self.tail_encoder_decoder = tail_encoder_decoder - self.loop_dummy_relation_name = loop_dummy_relation_name - self.none_label = none_label - self.label2id = label2id - self.id2label = {idx: label for label, idx in self.label2id.items()} - self.mode = mode - - def encode( - self, annotation: BinaryRelation, metadata: Optional[Dict[str, Any]] = None - ) -> List[int]: - encoded_head = self.head_encoder_decoder.encode(annotation=annotation.head) - encoded_tail = self.tail_encoder_decoder.encode(annotation=annotation.tail) - - if ( - self.loop_dummy_relation_name is not None - and annotation.label == self.loop_dummy_relation_name - ): - if annotation.head != annotation.tail: - raise ValueError( - f"expected head == tail for loop_dummy_relation, but got: {annotation.head}, " - f"{annotation.tail}" - ) - if self.none_label is None: - raise ValueError( - f"loop_dummy_relation_name is set, but none_label is not set: {self.none_label}" - ) - none_id = self.label2id[self.none_label] - encoded_none_argument = [none_id, none_id, none_id] - if self.mode == "head_tail_label": - return encoded_head + encoded_none_argument + [none_id] - elif self.mode == "tail_head_label": - return encoded_tail + encoded_none_argument + [none_id] - elif self.mode == "label_head_tail": - return [none_id] + encoded_head + encoded_none_argument - elif self.mode == "label_tail_head": - return [none_id] + encoded_tail + encoded_none_argument - else: - raise ValueError(f"unknown mode: {self.mode}") - else: - encoded_label = self.label2id[annotation.label] - if self.mode == "tail_head_label": - return encoded_tail + encoded_head + [encoded_label] - elif self.mode == "head_tail_label": - return encoded_head + encoded_tail + [encoded_label] - elif self.mode == "label_head_tail": - return [encoded_label] + encoded_head + encoded_tail - elif self.mode == "label_tail_head": - return [encoded_label] + encoded_tail + encoded_head - else: - raise ValueError(f"unknown mode: {self.mode}") - - def is_single_span_label(self, label: str) -> bool: - return self.none_label is not None and label == self.none_label - - def decode( - self, encoding: List[int], metadata: Optional[Dict[str, Any]] = None - ) -> BinaryRelation: - if len(encoding) != 7: - raise DecodingLengthException( - f"seven values are required to decode as BinaryRelation, but the encoding has length {len(encoding)}", - encoding=encoding, - ) - if self.mode.endswith("_label"): - encoded_label = encoding[6] - encoded_arguments = encoding[:6] - argument_mode = self.mode[: -len("_label")] - elif self.mode.startswith("label_"): - encoded_label = encoding[0] - encoded_arguments = encoding[1:] - argument_mode = self.mode[len("label_") :] - else: - raise ValueError(f"unknown mode: {self.mode}") - if encoded_label not in self.id2label: - raise DecodingLabelException( - f"unknown label id: {encoded_label} (label2id: {self.label2id})", encoding=encoding - ) - label = self.id2label[encoded_label] - if self.is_single_span_label(label=label): - if argument_mode == "head_tail": - span_encoder = self.head_encoder_decoder - elif argument_mode == "tail_head": - span_encoder = self.tail_encoder_decoder - else: - raise ValueError(f"unknown argument mode: {argument_mode}") - encoded_span = encoded_arguments[:3] - span = span_encoder.decode(encoding=encoded_span, metadata=metadata) - if self.loop_dummy_relation_name is None: - raise ValueError( - f"loop_dummy_relation_name is not set, but none_label={self.none_label} " - f"was found in decoded encoding: {encoding} (label2id: {self.label2id}))" - ) - rel = BinaryRelation(head=span, tail=span, label=self.loop_dummy_relation_name) - else: - if argument_mode == "head_tail": - encoded_head = encoded_arguments[:3] - encoded_tail = encoded_arguments[3:] - elif argument_mode == "tail_head": - encoded_tail = encoded_arguments[:3] - encoded_head = encoded_arguments[3:] - else: - raise ValueError(f"unknown argument mode: {argument_mode}") - head = self.head_encoder_decoder.decode(encoding=encoded_head, metadata=metadata) - tail = self.tail_encoder_decoder.decode(encoding=encoded_tail, metadata=metadata) - rel = BinaryRelation(head=head, tail=tail, label=label) - - return rel - - def build_decoding_constraints( - self, partial_encoding: List[int] - ) -> Tuple[Optional[Set[int]], Optional[Set[int]]]: - """Given a partial encoding, build the constraints for the next encoding step. - - Returns: - Tuple[Optional[Set[int]], Optional[Set[int]]]: A tuple of two sets of integers representing the allowed - and disallowed next indices. The first set contains the allowed indices, and the second set contains - the disallowed indices. If no constraints are needed, both sets can be None. - """ - allowed = None - disallowed = None - - if self.mode != "tail_head_label": - raise NotImplementedError( - f"build_decoder_constraints is not implemented for mode {self.mode}" - ) - - if self.none_label not in self.label2id: - raise ValueError( - f"none_label not found in label2id: {self.label2id} (none_label: {self.none_label})" - ) - none_id = self.label2id[self.none_label] - if self.head_encoder_decoder != self.tail_encoder_decoder: - raise NotImplementedError( - "head and tail encoder/decoder must be the same for build_decoder_constraints" - ) - - if not isinstance(self.head_encoder_decoder, LabeledSpanEncoderDecoder): - raise NotImplementedError( - "head and tail encoder/decoder must be LabeledSpanEncoderDecoder for build_decoder_constraints" - ) - if not isinstance( - self.head_encoder_decoder.span_encoder_decoder, SpanEncoderDecoderWithOffset - ): - raise NotImplementedError( - "head and tail encoder/decoder must be SpanEncoderDecoderWithOffset for build_decoder_constraints" - ) - pointer_offset = self.head_encoder_decoder.span_encoder_decoder.offset - if self.head_encoder_decoder.mode != "indices_label": - raise NotImplementedError( - "head and tail encoder/decoder must be indices_label for build_decoder_constraints" - ) - if ( - not isinstance(self.head_encoder_decoder.span_encoder_decoder, SpanEncoderDecoder) - or self.head_encoder_decoder.span_encoder_decoder.exclusive_end - ): - raise NotImplementedError( - "head and tail encoder/decoder must be exclusive_end for build_decoder_constraints" - ) - span_ids = set(self.head_encoder_decoder.label2id.values()) - relation_ids = set(self.label2id.values()) - {self.label2id[self.none_label]} - contains_none = none_id in partial_encoding - idx = len(partial_encoding) - if idx == 0: # [] -> first span start or eos - # Disallow all labels: - disallowed = set(range(pointer_offset)) - elif idx == 1: # [14] -> first span end - # Allow all offsets greater than the span start. - span_start = partial_encoding[-1] - # result[span_start:] = 1 - disallowed = set(range(span_start)) - # Disallow the none label: - disallowed.add(none_id) - elif idx == 2: # [14,14] -> first span label - # Allow only span ids. - allowed = span_ids - elif idx == 3: # [14,14,s1] -> second span start or none - # Disallow overlap of first and second span: - first_span_start = partial_encoding[0] - first_span_end = partial_encoding[1] + 1 - disallowed = set(range(first_span_start, first_span_end)) - # Disallow all span labels: - disallowed.update(span_ids) - # Disallow all relation labels: - disallowed.update(relation_ids) - # But allow the none label: - disallowed.discard(none_id) - - elif idx == 4: # [14,14,s1,23] -> second span end or none - # if we have a none label, allow only none - if contains_none: - allowed = {none_id} - else: - - first_span_start = partial_encoding[0] - # first_span_end = partial_encoding[1] + 1 - second_span_start = partial_encoding[-1] - # if first span is after the second span, - if second_span_start < first_span_start: - # just allow the offsets between the two spans: - allowed = set(range(second_span_start, first_span_start)) - else: - # otherwise, disallow all offsets before the second span start: - disallowed = set(range(second_span_start)) - - # Disallow all span labels: - disallowed.update(span_ids) - # Disallow all relation labels: - disallowed.update(relation_ids) - - elif idx == 5: # [14,14,s1,23,25] -> second span label or none - # if we have a none label, allow only none - if contains_none: - # result[none_id] = 1 - allowed = {none_id} - else: - # allow only span ids - allowed = span_ids - elif idx == 6: # [14,14,s1,23,25,s2] -> relation label or none - # if we have a none label, allow only none - if contains_none: - allowed = {none_id} - else: - # allow only relation ids - allowed = relation_ids - else: - raise ValueError( - f"unknown partial encoding length: {len(partial_encoding)} (encoding: {partial_encoding})" - ) - - return allowed, disallowed - - def parse(self, encoding: List[int]) -> Tuple[List[BinaryRelation], Dict[str, int], List[int]]: - errors: Dict[str, int] = defaultdict(int) - if self.none_label is None: - raise ValueError( - f"none_label is not set, but is required for parsing: {self.none_label}" - ) - none_id = self.label2id[self.none_label] - relation_ids = set(self.label2id.values()) - {none_id} - encodings = [] - current_encoding: List[int] = [] - valid_encoding: BinaryRelation - if len(encoding): - for i in encoding: - current_encoding.append(i) - # An encoding is complete when it ends with a relation_id - # or when it contains a none_id and has a length of 7 - if i in relation_ids or (i == none_id and len(current_encoding) == 7): - # try to decode the current relation encoding - try: - valid_encoding = self.decode(encoding=current_encoding) - encodings.append(valid_encoding) - errors[KEY_INVALID_CORRECT] += 1 - except DecodingException as e: - errors[e.identifier] += 1 - - current_encoding = [] - - return encodings, dict(errors), current_encoding diff --git a/src/pie_modules/taskmodules/pointer_network/logits_processor.py b/src/pie_modules/taskmodules/pointer_network/logits_processor.py deleted file mode 100644 index 777fd9763..000000000 --- a/src/pie_modules/taskmodules/pointer_network/logits_processor.py +++ /dev/null @@ -1,67 +0,0 @@ -import math -from typing import Callable, List - -import torch -from transformers import LogitsProcessor, add_start_docstrings -from transformers.generation.logits_process import LOGITS_PROCESSOR_INPUTS_DOCSTRING - - -class PrefixConstrainedLogitsProcessorWithMaximum(LogitsProcessor): - r"""This is similar to [`PrefixConstrainedLogitsProcessor`] but the constraint function gets the - maximum possible index as input. This is useful for Pointer Network where the generated token - can be an index into the input which depends on the length of that input. - - Args: - prefix_allowed_tokens_fn (Callable[[int, torch.LongTensor, int], List[int]]): - Should return the list of token ids allowed at the next generation step, - given (`batch_id`, `input_ids_so_far`, `max_index`). - """ - - def __init__( - self, - prefix_allowed_tokens_fn: Callable[[int, torch.LongTensor, int], List[int]], - num_beams: int, - ): - self._prefix_allowed_tokens_fn = prefix_allowed_tokens_fn - self._num_beams = num_beams - - @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING) - def __call__( - self, input_ids: torch.LongTensor, scores: torch.FloatTensor - ) -> torch.FloatTensor: - if not torch.isfinite(scores).all(): - raise ValueError( - "scores contains ±inf or NaN, which is not allowed by " - "PrefixConstrainedLogitsProcessorWithMaximum. " - "Insert FinitizeLogitsProcessor earlier to clean them." - ) - mask = torch.full_like(scores, -math.inf) - for batch_id, beam_sent in enumerate( - input_ids.view(-1, self._num_beams, input_ids.shape[-1]) - ): - for beam_id, sent in enumerate(beam_sent): - allowed_ids = self._prefix_allowed_tokens_fn(batch_id, sent, mask.size(1)) - if len(allowed_ids) == 0: - raise ValueError( - f"No allowed token ids for batch_id {batch_id}, beam_id {beam_id} with " - f"previous ids: {sent}. This would result in undefined behaviour, " - "so this is not allowed. Please adjust the prefix_allowed_tokens_fn " - "implementation." - ) - mask[batch_id * self._num_beams + beam_id, allowed_ids] = 0 - - return scores + mask - - -class FinitizeLogitsProcessor(LogitsProcessor): - r"""Replaces any `±inf` logits with the largest-magnitude finite values for the tensor’s dtype, - ensuring all logits are valid for downstream ops.""" - - @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING) - def __call__( - self, input_ids: torch.LongTensor, scores: torch.FloatTensor - ) -> torch.FloatTensor: - finite_min = torch.finfo(scores.dtype).min - finite_max = torch.finfo(scores.dtype).max - # Use nan_to_num for a fast, fused replacement (PyTorch ≄ 1.8) - return torch.nan_to_num(scores, neginf=finite_min, posinf=finite_max) diff --git a/src/pie_modules/taskmodules/pointer_network_for_end2end_re.py b/src/pie_modules/taskmodules/pointer_network_for_end2end_re.py deleted file mode 100644 index 9ddb4254b..000000000 --- a/src/pie_modules/taskmodules/pointer_network_for_end2end_re.py +++ /dev/null @@ -1,865 +0,0 @@ -import dataclasses -import json -import logging -from collections import Counter, defaultdict -from functools import cmp_to_key -from typing import ( - Any, - Dict, - Iterable, - Iterator, - List, - Optional, - Sequence, - Set, - Tuple, - Type, - Union, -) - -import torch -from pie_core import ( - Annotation, - AnnotationLayer, - Document, - TaskEncoding, - TaskModule, -) -from pie_core.taskmodule import ( - InputEncoding, - ModelBatchOutput, - TargetEncoding, - TaskBatchEncoding, -) -from pie_core.utils.hydra import resolve_type -from torchmetrics import Metric -from transformers import AutoTokenizer, LogitsProcessorList, PreTrainedTokenizer -from typing_extensions import TypeAlias - -from pie_modules.annotations import BinaryRelation, LabeledSpan - -# import for backwards compatibility (don't remove!) -from pie_modules.documents import ( - TextBasedDocument, - TokenBasedDocument, - TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions, -) - -from ..document.processing import token_based_document_to_text_based, tokenize_document -from .common import BatchableMixin, get_first_occurrence_index -from .metrics import ( - PrecisionRecallAndF1ForLabeledAnnotations, - WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction, -) -from .pointer_network.annotation_encoder_decoder import ( - KEY_INVALID_CORRECT, - BinaryRelationEncoderDecoder, - LabeledSpanEncoderDecoder, - SpanEncoderDecoderWithOffset, -) -from .pointer_network.logits_processor import ( - FinitizeLogitsProcessor, - PrefixConstrainedLogitsProcessorWithMaximum, -) - -logger = logging.getLogger(__name__) - - -DocumentType: TypeAlias = TextBasedDocument - - -@dataclasses.dataclass -class InputEncodingType(BatchableMixin): - input_ids: List[int] - attention_mask: List[int] - - -@dataclasses.dataclass -class LabelsAndOptionalConstraints(BatchableMixin): - labels: List[int] - constraints: Optional[List[List[int]]] = None - - @property - def decoder_attention_mask(self) -> List[int]: - return [1] * len(self.labels) - - -TargetEncodingType: TypeAlias = LabelsAndOptionalConstraints -TaskEncodingType: TypeAlias = TaskEncoding[ - DocumentType, - InputEncodingType, - TargetEncodingType, -] -TaskOutputType: TypeAlias = LabelsAndOptionalConstraints - - -def cmp_src_rel(v1: BinaryRelation, v2: BinaryRelation) -> int: - if not all(isinstance(ann, LabeledSpan) for ann in [v1.head, v1.tail, v2.head, v2.tail]): - raise Exception(f"expected LabeledSpan, but got: {v1}, {v2}") - if v1.head.start == v2.head.start: # v1[0]["from"] == v2[0]["from"]: - return v1.tail.start - v2.tail.start # v1[1]["from"] - v2[1]["from"] - return v1.head.start - v2.head.start # v1[0]["from"] - v2[0]["from"] - - -@TaskModule.register() -class PointerNetworkTaskModuleForEnd2EndRE( - TaskModule[ - DocumentType, - InputEncoding, - TargetEncoding, - TaskBatchEncoding, - ModelBatchOutput, - TaskOutputType, - ], -): - PREPARED_ATTRIBUTES = ["labels_per_layer"] - REVERSED_RELATION_LABEL_SUFFIX = "_reversed" - - def __init__( - self, - tokenizer_name_or_path: str, - # specific for this use case - document_type: str = "pytorch_ie.documents.TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions", - tokenized_document_type: str = "pie_modules.documents.TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions", - relation_layer_name: str = "binary_relations", - add_reversed_relations: bool = False, - symmetric_relations: Optional[List[str]] = None, - none_label: str = "none", - loop_dummy_relation_name: str = "loop", - constrained_generation: bool = False, - # generic pointer network - label_tokens: Optional[Dict[str, str]] = None, - label_representations: Optional[Dict[str, str]] = None, - labels_per_layer: Optional[Dict[str, List[str]]] = None, - exclude_labels_per_layer: Optional[Dict[str, List[str]]] = None, - # target encoding - create_constraints: bool = False, - # tokenization - tokenizer_init_kwargs: Optional[Dict[str, Any]] = None, - tokenizer_kwargs: Optional[Dict[str, Any]] = None, - partition_layer_name: Optional[str] = None, - annotation_field_mapping: Optional[Dict[str, str]] = None, - # logging - log_first_n_examples: Optional[int] = None, - **kwargs, - ): - super().__init__(**kwargs) - self.save_hyperparameters() - - # tokenization - self._document_type: Type[TextBasedDocument] = resolve_type( - document_type, expected_super_type=TextBasedDocument - ) - self._tokenized_document_type: Type[TokenBasedDocument] = resolve_type( - tokenized_document_type, expected_super_type=TokenBasedDocument - ) - self.tokenizer_name_or_path = tokenizer_name_or_path - self.tokenizer_kwargs = tokenizer_kwargs or {} - self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained( - tokenizer_name_or_path, - **(tokenizer_init_kwargs or {}), - ) - self.annotation_field_mapping = annotation_field_mapping or dict() - annotation_field_mapping_inv = {v: k for k, v in self.annotation_field_mapping.items()} - if len(self.annotation_field_mapping) != len(annotation_field_mapping_inv): - raise ValueError( - f"inverted annotation_field_mapping is not unique. annotation_field_mapping: " - f"{self.annotation_field_mapping}" - ) - self.partition_layer_name = partition_layer_name - - # for this specific use case: end-to-end relation extraction - self.relation_layer_name = relation_layer_name - relation_layer_mapped = self.annotation_field_mapping.get( - relation_layer_name, relation_layer_name - ) - relation_layer_target = self.document_type.target_name(relation_layer_mapped) - self.span_layer_name = annotation_field_mapping_inv.get( - relation_layer_target, relation_layer_target - ) - self.add_reversed_relations = add_reversed_relations - self.symmetric_relations = set(symmetric_relations or []) - self.none_label = none_label - self.loop_dummy_relation_name = loop_dummy_relation_name - self.constrained_generation = constrained_generation - # will be set in _post_prepare() - self.relation_encoder_decoder: BinaryRelationEncoderDecoder - - # collected in prepare(), if not passed in - self.labels_per_layer = labels_per_layer - self.exclude_labels_per_layer = exclude_labels_per_layer or {} - - # how to encode and decode the annotations - self.bos_token = self.tokenizer.bos_token - self.eos_token = self.tokenizer.eos_token - self.label_tokens = label_tokens or dict() - self.label_representations = label_representations or dict() - - # target encoding - self.create_constraints = create_constraints - self.pad_values = { - "input_ids": self.tokenizer.pad_token_id, - "attention_mask": 0, - "labels": self.target_pad_id, - "decoder_attention_mask": 0, - "constraints": -1, - } - self.dtypes = { - "input_ids": torch.int64, - "attention_mask": torch.int64, - "labels": torch.int64, - "decoder_attention_mask": torch.int64, - "constraints": torch.int64, - } - - # logging - self.log_first_n_examples = log_first_n_examples - - @property - def document_type(self) -> Type[TextBasedDocument]: - return self._document_type - - @property - def tokenized_document_type(self) -> Type[TokenBasedDocument]: - return self._tokenized_document_type - - @property - def layer_names(self) -> List[str]: - return [self.span_layer_name, self.relation_layer_name] - - @property - def special_targets(self) -> list[str]: - return [self.bos_token, self.eos_token] - - @property - def special_target2id(self) -> Dict[str, int]: - return {target: idx for idx, target in enumerate(self.special_targets)} - - @property - def target_pad_id(self) -> int: - return self.special_target2id[self.eos_token] - - def configure_model_generation(self) -> Dict[str, Any]: - result: Dict[str, Any] = {"no_repeat_ngram_size": 7} - if self.constrained_generation: - logits_processor = LogitsProcessorList() - # PrefixConstrainedLogitsProcessorWithMaximum requires finite logits - logits_processor.append(FinitizeLogitsProcessor()) - logits_processor.append( - PrefixConstrainedLogitsProcessorWithMaximum( - prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn_with_maximum, - # use dummy value of 1, this is fine because num_beams affects only the value of batch_id - # which is not used in _prefix_allowed_tokens_fn_with_maximum() - num_beams=1, - ) - ) - result["logits_processor"] = logits_processor - return result - - def _prefix_allowed_tokens_fn_with_maximum( - self, batch_id: int, input_ids: torch.LongTensor, maximum: int - ) -> List[int]: - # remove the first token (bos_token) and use unbatch_output to un-pad the label_ids - label_ids_without_bos = input_ids[1:] - if len(label_ids_without_bos) > 0: - unpadded_label_ids = self.unbatch_output( - {"labels": label_ids_without_bos.unsqueeze(0)} - )[0].labels - else: - unpadded_label_ids = [] - _, _, remaining = self.relation_encoder_decoder.parse(encoding=unpadded_label_ids) - # this is a binary mask - constraint = self._build_constraint( - previous_ids=remaining, input_len=maximum - self.pointer_offset - ) - # convert to indices - allowed_indices = torch.nonzero(constraint).squeeze(1) - # convert to a list - return allowed_indices.tolist() - - def add_reversed_relation_labels(self, relation_labels: Iterable[str]) -> Set[str]: - result = set(relation_labels) - for rel_label in set(relation_labels): - if rel_label not in self.symmetric_relations: - reversed_label = rel_label + self.REVERSED_RELATION_LABEL_SUFFIX - if reversed_label in result: - raise ValueError( - f"reversed relation label {reversed_label} already exists in relation layer labels" - ) - result.add(reversed_label) - return result - - def _prepare(self, documents: Sequence[DocumentType]) -> None: - # collect all labels - labels: Dict[str, Set[str]] = {layer_name: set() for layer_name in self.layer_names} - for doc in documents: - for layer_name in self.layer_names: - exclude_labels = self.exclude_labels_per_layer.get(layer_name, []) - labels[layer_name].update( - ac.label for ac in doc[layer_name] if ac.label not in exclude_labels - ) - - if self.add_reversed_relations: - labels[self.relation_layer_name] = self.add_reversed_relation_labels( - relation_labels=labels[self.relation_layer_name] - ) - - self.labels_per_layer = { - # sort labels to ensure deterministic order - layer_name: sorted(labels) - for layer_name, labels in labels.items() - } - - def construct_label_token(self, label: str) -> str: - return self.label_tokens.get(label, f"<<{label}>>") - - def get_label_representation(self, label: str) -> str: - return self.label_representations.get(label, label) - - def _post_prepare(self) -> None: - # set up labels - if self.labels_per_layer is None: - raise Exception("labels_per_layer is not defined. Call prepare() first or pass it in.") - self.labels: List[str] = [self.none_label] - for layer_name in self.layer_names: - self.labels.extend(self.labels_per_layer[layer_name]) - if len(set(self.labels)) != len(self.labels): - raise Exception(f"labels are not unique: {self.labels}") - - # set up targets and ids - self.targets: List[str] = self.special_targets + self.labels - self.target2id: Dict[str, int] = {target: idx for idx, target in enumerate(self.targets)} - - # generic ids - self.eos_id: int = self.target2id[self.eos_token] - self.bos_id: int = self.target2id[self.bos_token] - - # span and relation ids - self.span_ids: List[int] = [ - self.target2id[label] for label in self.labels_per_layer[self.span_layer_name] - ] - self.relation_ids: List[int] = [ - self.target2id[label] for label in self.labels_per_layer[self.relation_layer_name] - ] - # the none id is used for the dummy relation which models out-of-relation spans - self.none_id: int = self.target2id[self.none_label] - - # helpers (same as targets / target2id, but only for labels) - self.label2id: Dict[str, int] = {label: self.target2id[label] for label in self.labels} - self.id2label: Dict[int, str] = {idx: label for label, idx in self.label2id.items()} - self.label_ids: List[int] = [self.label2id[label] for label in self.labels] - - # annotation-encoder-decoders - span_encoder_decoder = SpanEncoderDecoderWithOffset( - offset=self.pointer_offset, exclusive_end=False - ) - span_labels = self.labels_per_layer[self.span_layer_name] - labeled_span_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=span_encoder_decoder, - # restrict label2id to get better error messages - label2id={label: idx for label, idx in self.label2id.items() if label in span_labels}, - mode="indices_label", - ) - relation_labels = self.labels_per_layer[self.relation_layer_name] + [ - self.loop_dummy_relation_name, - self.none_label, - ] - self.relation_encoder_decoder = BinaryRelationEncoderDecoder( - head_encoder_decoder=labeled_span_encoder_decoder, - tail_encoder_decoder=labeled_span_encoder_decoder, - # restrict label2id to get better error messages - label2id={ - label: idx for label, idx in self.label2id.items() if label in relation_labels - }, - loop_dummy_relation_name=self.loop_dummy_relation_name, - none_label=self.none_label, - mode="tail_head_label", - ) - - label2token = {label: self.construct_label_token(label=label) for label in self.labels} - if len(set(label2token.values())) != len(label2token): - raise Exception(f"label2token values are not unique: {label2token}") - - already_in_vocab = [ - tok - for tok in label2token.values() - if self.tokenizer.convert_tokens_to_ids(tok) != self.tokenizer.unk_token_id - ] - if len(already_in_vocab) > 0: - raise Exception( - f"some special tokens to add (mapped label ids) are already in the tokenizer vocabulary, " - f"this is not allowed: {already_in_vocab}. You may want to adjust the label2special_token mapping" - ) - # sort by length, so that longer tokens are added first - label_tokens_sorted = sorted(label2token.values(), key=lambda x: len(x), reverse=True) - self.tokenizer.add_special_tokens( - special_tokens_dict={"additional_special_tokens": label_tokens_sorted} - ) - - # target tokens are the special tokens plus the mapped label tokens - self.target_tokens: List[str] = self.special_targets + [ - label2token[label] for label in self.labels - ] - self.target_token_ids: List[int] = self.tokenizer.convert_tokens_to_ids(self.target_tokens) - - # construct a mapping from label_token_id to token_ids that will be used to initialize the embeddings - # of the labels - self.label_embedding_weight_mapping = dict() - for label, label_token in label2token.items(): - label_token_indices = self.tokenizer.convert_tokens_to_ids( - self.tokenizer.tokenize(label_token) - ) - # sanity check: label_tokens should not be split up - if len(label_token_indices) > 1: - raise RuntimeError(f"{label_token} wrong split") - else: - label_token_idx = label_token_indices[0] - - label_representation = self.get_label_representation(label) - source_indices = self.tokenizer.convert_tokens_to_ids( - self.tokenizer.tokenize(label_representation) - ) - if self.tokenizer.unk_token_id in source_indices: - raise RuntimeError( - f"tokenized label_token={label_token} [{source_indices}] contains unk_token" - ) - self.label_embedding_weight_mapping[label_token_idx] = source_indices - - @property - def pointer_offset(self) -> int: - return len(self.targets) - - @property - def target_ids(self) -> Set[int]: - return set(range(self.pointer_offset)) - - def configure_model_metric(self, stage: Optional[str] = None) -> Optional[Metric]: - layer_metrics = { - layer_name: PrecisionRecallAndF1ForLabeledAnnotations() - for layer_name in self.layer_names - } - - return WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction( - unbatch_function=self.unbatch_output, - decode_layers_with_errors_function=self.decode_annotations, - layer_metrics=layer_metrics, - error_key_correct=KEY_INVALID_CORRECT, - ) - - def reverse_relation(self, relation: Annotation) -> BinaryRelation: - if isinstance(relation, BinaryRelation): - reversed_label = relation.label - if ( - reversed_label not in self.symmetric_relations - and reversed_label != self.none_label - ): - reversed_label += self.REVERSED_RELATION_LABEL_SUFFIX - reversed_rel = relation.copy( - head=relation.tail, tail=relation.head, label=reversed_label - ) - return reversed_rel - else: - raise Exception(f"reversing of relations of type {type(relation)} is not supported") - - def unreverse_relation(self, relation: Annotation) -> BinaryRelation: - if isinstance(relation, BinaryRelation): - head, tail, label = relation.head, relation.tail, relation.label - # if the relation is symmetric, we sort head and tail to ensure consistent order - if relation.label in self.symmetric_relations: - head, tail = sorted([head, tail], key=lambda x: (x.start, x.end)) - # if the relation was reversed, we need to reconstruct the original label and swap head and tail - elif label.endswith(self.REVERSED_RELATION_LABEL_SUFFIX): - # reconstruct the original label and swap head and tail - label = label[: -len(self.REVERSED_RELATION_LABEL_SUFFIX)] - head, tail = tail, head - return relation.copy(head=head, tail=tail, label=label) - else: - raise Exception(f"un-reversing of relations of type {type(relation)} is not supported") - - def encode_annotations( - self, layers: Dict[str, Iterable[Annotation]], metadata: Optional[Dict[str, Any]] = None - ) -> TaskOutputType: - if not set(layers.keys()) == set(self.layer_names): - raise Exception(f"unexpected layers: {layers.keys()}. expected: {self.layer_names}") - - if self.labels_per_layer is None: - raise Exception("labels_per_layer is not defined. Call prepare() first or pass it in.") - - # encode relations - all_relation_arguments = set() - relation_arguments2label: Dict[Tuple[Annotation, ...], str] = dict() - relation_encodings = dict() - for rel in layers[self.relation_layer_name]: - if not isinstance(rel, BinaryRelation): - raise Exception(f"expected BinaryRelation, but got: {rel}") - if rel.label in self.labels_per_layer[self.relation_layer_name]: - if (rel.head, rel.tail) in relation_arguments2label: - previous_label = relation_arguments2label[(rel.head, rel.tail)] - if previous_label != rel.label: - raise ValueError( - f"relation {rel.head} -> {rel.tail} already exists, but has another label: " - f"{previous_label} (current label: {rel.label})." - ) - continue - encoded_relation = self.relation_encoder_decoder.encode( - annotation=rel, metadata=metadata - ) - if encoded_relation is None: - raise Exception(f"failed to encode relation: {rel}") - relation_encodings[rel] = encoded_relation - all_relation_arguments.update([rel.head, rel.tail]) - relation_arguments2label[(rel.head, rel.tail)] = rel.label - - # encode spans that are not arguments of any relation - no_relation_spans = [ - span for span in layers[self.span_layer_name] if span not in all_relation_arguments - ] - for span in no_relation_spans: - dummy_relation = BinaryRelation( - head=span, tail=span, label=self.loop_dummy_relation_name - ) - encoded_relation = self.relation_encoder_decoder.encode( - annotation=dummy_relation, metadata=metadata - ) - if encoded_relation is not None: - relation_encodings[dummy_relation] = encoded_relation - - # sort relations by start indices of head and tail # TODO: is this correct? - sorted_relations = sorted(relation_encodings, key=cmp_to_key(cmp_src_rel)) - - # this should never be accessed as it is, so use negative pointer offset to provoke an error - input_len = -self.pointer_offset - 1 - if self.create_constraints: - if metadata is None or "src_len" not in metadata: - raise Exception("metadata with 'src_len' is required to create constraints") - input_len = metadata["src_len"] - - # build target_ids - target_ids = [] - constraints_list = [] - for rel in sorted_relations: - encoded_relation = relation_encodings[rel] - target_ids.extend(encoded_relation) - - if self.create_constraints: - # iterate over all prefixes of the relation encoding - for idx, t in enumerate(encoded_relation): - # get the constraints for the current prefix - current_constraints = self._build_constraint( - previous_ids=encoded_relation[:idx], input_len=input_len - ) - # sanity check - if current_constraints[t] == 0: - raise Exception( - f"current_constraints[{t}] is 0, but should be 1: {current_constraints}" - ) - # add the constraints to the list - constraints_list.append(current_constraints) - - target_ids.append(self.eos_id) - - if self.create_constraints: - # add constraints for the eos_id - eos_constraint = torch.zeros(input_len + self.pointer_offset, dtype=torch.int64) - eos_constraint[self.eos_id] = 1 - constraints_list.append(eos_constraint) - # combine all constraints - constraints = torch.stack(constraints_list).tolist() - else: - constraints = None - - # sanity check - _, encoding_errors, remaining = self.relation_encoder_decoder.parse(encoding=target_ids) - if ( - not all(v == 0 for k, v in encoding_errors.items() if k != "correct") - or len(remaining) > 0 - ): - decoded, invalid = self.decode_annotations(LabelsAndOptionalConstraints(target_ids)) - not_encoded = {} - for layer_name in layers: - # convert to dicts to make them comparable (original annotations are attached which breaks comparison) - decoded_dicts = [ann.asdict() for ann in decoded[layer_name]] - # filter annotations and convert to str to make them json serializable - filtered = { - str(ann) for ann in layers[layer_name] if ann.asdict() not in decoded_dicts - } - if len(filtered) > 0: - not_encoded[layer_name] = list(filtered) - if len(not_encoded) > 0: - logger.warning( - f"encoding errors: {encoding_errors}, skipped annotations:\n" - f"{json.dumps(not_encoded, sort_keys=True, indent=2)}" - ) - elif len([tag for tag in remaining if tag != self.eos_id]) > 0: - logger.warning( - f"encoding errors: {encoding_errors}, remaining encoding ids: {remaining}" - ) - - return LabelsAndOptionalConstraints(labels=target_ids, constraints=constraints) - - def decode_annotations( - self, encoding: TaskOutputType - ) -> Tuple[Dict[str, Iterable[Annotation]], Dict[str, int]]: - decoded_relations, errors, remaining = self.relation_encoder_decoder.parse( - encoding=encoding.labels - ) - relation_tuples: List[Tuple[Annotation, Annotation, str]] = [] - entity_labels: Dict[Annotation, List[str]] = defaultdict(list) - for rel in decoded_relations: - head_dummy = rel.head.copy(label="dummy") - entity_labels[head_dummy].append(rel.head.label) - - if rel.label != self.loop_dummy_relation_name: - tail_dummy = rel.tail.copy(label="dummy") - entity_labels[tail_dummy].append(rel.tail.label) - relation_tuples.append((head_dummy, tail_dummy, rel.label)) - else: - assert rel.head == rel.tail - - # It may happen that some spans take part in multiple relations, but got generated with different labels. - # In this case, we just create one span and take the most common label. - entities: Dict[Annotation, Annotation] = {} - for entity_dummy, labels in entity_labels.items(): - c = Counter(labels) - # if len(c) > 1: - # logger.warning(f"multiple labels for span, take the most common: {dict(c)}") - most_common_label = c.most_common(1)[0][0] - entities[entity_dummy] = entity_dummy.copy(label=most_common_label) - - entity_layer = list(entities.values()) - relation_layer = [ - BinaryRelation(head=entities[head_dummy], tail=entities[tail_dummy], label=label) - for head_dummy, tail_dummy, label in relation_tuples - ] - return { - self.span_layer_name: entity_layer, - self.relation_layer_name: relation_layer, - }, errors - - def _build_constraint( - self, - previous_ids: List[int], - input_len: int, - ) -> torch.LongTensor: - """Build a constraint for the decoder. The constraint is a binary mask that indicates which - ids are allowed to be predicted in the next decoding step. The mask is of size input_len + - pointer_offset, where input_len is the length of the input sequence and pointer_offset is - the number of labels and special tokens. Uses the relation_encoder_decoder to build the - actual constraints. - - Args: - previous_ids: previously decoded ids - input_len: length of the input sequence - - Returns: - A binary mask of size input_len + pointer_offset, where 1 indicates that the id is - allowed to be predicted next, and 0 indicates that the id is not allowed to be predicted next. - """ - result: torch.LongTensor = torch.zeros(input_len + self.pointer_offset, dtype=torch.int64) - if self.eos_id in previous_ids: - # once eos is predicted, only allow padding - result[self.target_pad_id] = 1 - return result - - allowed_ids, disallowed_ids = self.relation_encoder_decoder.build_decoding_constraints( - partial_encoding=previous_ids - ) - if allowed_ids is not None and disallowed_ids is not None: - raise Exception( - f"allowed_ids and disallowed_ids are both not None: {allowed_ids}, {disallowed_ids}" - ) - elif allowed_ids is not None: - for allowed_id in allowed_ids: - result[allowed_id] = 1 - elif disallowed_ids is not None: - for id in range(len(result)): - if id not in disallowed_ids: - result[id] = 1 - else: - raise Exception( - f"allowed_ids and disallowed_ids are both None: {allowed_ids}, {disallowed_ids}" - ) - if len(previous_ids) == 0: - # if there are no previous ids, we also allow the eos_id - result[self.eos_id] = 1 - else: - # if there are previous ids, we don't allow the eos_id - result[self.eos_id] = 0 - # never allow the bos_id - result[self.bos_id] = 0 - - return result - - def maybe_log_example( - self, - task_encoding: TaskEncodingType, - targets: Optional[TargetEncodingType] = None, - ): - if self.log_first_n_examples is not None and self.log_first_n_examples > 0: - tokenized_doc_id = task_encoding.metadata["tokenized_document"].id - inputs = task_encoding.inputs - targets = targets or task_encoding.targets - input_tokens = self.tokenizer.convert_ids_to_tokens(inputs.input_ids) - label_tokens = [ - ( - self.targets[target_id_or_offset] - if target_id_or_offset < self.pointer_offset - else str(target_id_or_offset) - + " {" - + str(input_tokens[target_id_or_offset - self.pointer_offset]) - + "}" - ) - for target_id_or_offset in targets.labels - ] - logger.info("*** Example ***") - logger.info(f"doc.id: {tokenized_doc_id}") - logger.info(f"input_ids: {' '.join([str(i) for i in inputs.input_ids])}") - logger.info(f"input_tokens: {' '.join(input_tokens)}") - logger.info(f"label_ids: {' '.join([str(i) for i in targets.labels])}") - logger.info(f"label_tokens: {' '.join(label_tokens)}") - if self.create_constraints: - # only show the shape because the content is not very readable - logger.info( - f"constraints: {torch.tensor(targets.constraints).shape} (content is omitted)" - ) - self.log_first_n_examples -= 1 - - def tokenize_document(self, document: DocumentType) -> List[TokenBasedDocument]: - field_mapping = dict(self.annotation_field_mapping) - if self.partition_layer_name is not None: - field_mapping[self.partition_layer_name] = "labeled_partitions" - partition_layer = "labeled_partitions" - else: - partition_layer = None - casted_document = document.as_type(self.document_type, field_mapping=field_mapping) - tokenized_docs = tokenize_document( - casted_document, - tokenizer=self.tokenizer, - result_document_type=self.tokenized_document_type, - partition_layer=partition_layer, - **self.tokenizer_kwargs, - ) - for idx, tokenized_doc in enumerate(tokenized_docs): - tokenized_doc.id = f"{document.id}-tokenized-{idx+1}-of-{len(tokenized_docs)}" - - return tokenized_docs - - def encode_input( - self, document: DocumentType, is_training: bool = False - ) -> Optional[Union[TaskEncodingType, Sequence[TaskEncodingType]]]: - tokenized_docs = self.tokenize_document(document) - task_encodings: List[TaskEncodingType] = [] - for tokenized_doc in tokenized_docs: - tokenizer_encoding = tokenized_doc.metadata["tokenizer_encoding"] - task_encodings.append( - TaskEncoding( - document=document, - inputs=InputEncodingType( - input_ids=tokenizer_encoding.ids, - attention_mask=tokenizer_encoding.attention_mask, - ), - metadata={"tokenized_document": tokenized_doc}, - ) - ) - - return task_encodings - - def get_mapped_layer(self, document: Document, layer_name: str) -> AnnotationLayer: - if layer_name in self.annotation_field_mapping: - layer_name = self.annotation_field_mapping[layer_name] - return document[layer_name] - - def encode_target(self, task_encoding: TaskEncodingType) -> Optional[TargetEncodingType]: - try: - document = task_encoding.metadata["tokenized_document"] - - layers = { - layer_name: self.get_mapped_layer(document, layer_name=layer_name) - for layer_name in self.layer_names - } - - if self.add_reversed_relations: - # create a copy to avoid modifying the annotation layer in the document - relations = list(layers[self.relation_layer_name]) - reversed_relations = [self.reverse_relation(rel) for rel in relations] - layers[self.relation_layer_name] = relations + reversed_relations - - result = self.encode_annotations( - layers=layers, - metadata={ - **task_encoding.metadata, - "src_len": len(task_encoding.inputs.input_ids), - }, - ) - - self.maybe_log_example(task_encoding=task_encoding, targets=result) - return result - except Exception as e: - logger.error(f"failed to encode target, it will be skipped: {e}") - return None - - def collate(self, task_encodings: Sequence[TaskEncodingType]) -> TaskBatchEncoding: - if len(task_encodings) == 0: - raise ValueError("no task_encodings available") - inputs = InputEncodingType.batch( - values=[x.inputs for x in task_encodings], - dtypes=self.dtypes, - pad_values=self.pad_values, - ) - - targets = None - if task_encodings[0].has_targets: - targets = TargetEncodingType.batch( - values=[x.targets for x in task_encodings], - dtypes=self.dtypes, - pad_values=self.pad_values, - ) - - return inputs, targets - - def unbatch_output(self, model_output: ModelBatchOutput) -> Sequence[TaskOutputType]: - labels = model_output["labels"] - batch_size = labels.size(0) - - # We use the position after the first eos token as the seq_len. - # Note that, if eos_id is not in model_output for a given batch item, the result will be - # model_output.size(1) + 1 (i.e. seq_len + 1) for that batch item. This is fine, because we use the - # seq_lengths just to truncate the output and want to keep everything if eos_id is not present. - seq_lengths = get_first_occurrence_index(labels, self.eos_id) + 1 - - result = [ - LabelsAndOptionalConstraints(labels[i, : seq_lengths[i]].to(device="cpu").tolist()) - for i in range(batch_size) - ] - return result - - def create_annotations_from_output( - self, - task_encoding: TaskEncodingType, - task_output: TaskOutputType, - ) -> Iterator[Tuple[str, Annotation]]: - layers, errors = self.decode_annotations( - encoding=task_output, # metadata=task_encoding.metadata - ) - tokenized_document = task_encoding.metadata["tokenized_document"] - - # Note: token_based_document_to_text_based() does not yet consider predictions, so we need to clear - # the main annotations and attach the predictions to that - for layer_name, annotations in layers.items(): - layer = self.get_mapped_layer(tokenized_document, layer_name=layer_name) - layer.clear() - layer.extend(annotations) - - untokenized_document = token_based_document_to_text_based( - tokenized_document, result_document_type=self.document_type - ) - - for layer_name in layers: - annotations = self.get_mapped_layer(untokenized_document, layer_name=layer_name) - for annotation in annotations: - # handle relations that may be reversed - if layer_name == self.relation_layer_name and self.add_reversed_relations: - unreversed_relation = self.unreverse_relation(annotation) - yield layer_name, unreversed_relation - else: - yield layer_name, annotation.copy() diff --git a/src/pie_modules/taskmodules/re_span_pair_classification.py b/src/pie_modules/taskmodules/re_span_pair_classification.py deleted file mode 100644 index ad259bc3d..000000000 --- a/src/pie_modules/taskmodules/re_span_pair_classification.py +++ /dev/null @@ -1,829 +0,0 @@ -""" -workflow: - Document - -> (InputEncoding, TargetEncoding) -> TaskEncoding -> TaskBatchEncoding - -> ModelBatchEncoding -> ModelBatchOutput - -> TaskOutput - -> Document -""" - -import logging -from collections import defaultdict -from copy import deepcopy -from typing import ( - Any, - Dict, - Iterable, - Iterator, - List, - Optional, - Sequence, - Set, - Tuple, - Type, - TypedDict, - Union, -) - -import pandas as pd -import torch -from pie_core import ( - Annotation, - AnnotationLayer, - Document, - TaskEncoding, - TaskModule, -) -from pytorch_ie.taskmodules.interface import ChangesTokenizerVocabSize -from tokenizers import AddedToken -from torch import BoolTensor, LongTensor, Tensor -from torch.nn.utils.rnn import pad_sequence -from torchmetrics import ClasswiseWrapper, F1Score, Metric, MetricCollection -from transformers import AutoTokenizer -from typing_extensions import TypeAlias - -from pie_modules.annotations import ( - BinaryRelation, - LabeledSpan, - MultiLabeledBinaryRelation, - NaryRelation, -) -from pie_modules.document.processing import ( - token_based_document_to_text_based, - tokenize_document, -) -from pie_modules.documents import ( - TextBasedDocument, - TextDocumentWithLabeledPartitions, - TextDocumentWithLabeledSpansAndBinaryRelations, - TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions, - TokenDocumentWithLabeledSpansAndBinaryRelations, - TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions, -) -from pie_modules.taskmodules.metrics import WrappedMetricWithPrepareFunction -from pie_modules.utils.span import distance as get_span_distance - -PAD_VALUES = { - "input_ids": 0, - "attention_mask": 0, - "span_start_indices": 0, - "span_end_indices": 0, - "tuple_indices": -1, - "labels": -100, - "tuple_indices_mask": False, -} -DTYPES = { - "input_ids": torch.long, - "attention_mask": torch.long, - "span_start_indices": torch.long, - "span_end_indices": torch.long, - "tuple_indices": torch.long, - "labels": torch.long, - "tuple_indices_mask": torch.bool, -} - - -class InputEncodingType(TypedDict, total=False): - # shape: (sequence_length,) - input_ids: LongTensor - # shape: (sequence_length,) - attention_mask: LongTensor - # shape: (num_entities,) - span_start_indices: LongTensor - # shape: (num_entities,) - span_end_indices: LongTensor - # list of lists of argument indices: [[head_idx, tail_idx], ...] - # NOTE: these indices point into span_start_indices and span_end_indices! - tuple_indices: LongTensor - tuple_indices_mask: BoolTensor - - -class TargetEncodingType(TypedDict, total=False): - # list of label indices: [label_idx, ...] - labels: LongTensor - - -DocumentType: TypeAlias = TextBasedDocument -TaskEncodingType: TypeAlias = TaskEncoding[ - DocumentType, - InputEncodingType, - TargetEncodingType, -] - - -class TaskOutputType(TypedDict, total=False): - labels: Sequence[str] - probabilities: Sequence[float] - - -class ModelInputType(TypedDict, total=False): - input_ids: LongTensor - attention_mask: LongTensor - span_start_indices: LongTensor - span_end_indices: LongTensor - tuple_indices: LongTensor - tuple_indices_mask: BoolTensor - - -class ModelTargetType(TypedDict, total=False): - labels: LongTensor - probabilities: LongTensor - - -TaskModuleType: TypeAlias = TaskModule[ - # _InputEncoding, _TargetEncoding, _TaskBatchEncoding, _ModelBatchOutput, _TaskOutput - DocumentType, - InputEncodingType, - TargetEncodingType, - Tuple[ModelInputType, Optional[ModelTargetType]], - ModelTargetType, - TaskOutputType, -] - - -HEAD = "head" -TAIL = "tail" -START = "start" -END = "end" - - -logger = logging.getLogger(__name__) - - -def _get_label_ids_from_model_output( - model_output: ModelTargetType, -) -> LongTensor: - return model_output["labels"] - - -def get_relation_argument_spans_and_roles( - relation: Annotation, -) -> Tuple[Tuple[str, Annotation], ...]: - if isinstance(relation, BinaryRelation): - return (HEAD, relation.head), (TAIL, relation.tail) - elif isinstance(relation, NaryRelation): - # create unique order by sorting the arguments by their start and end positions and role - sorted_args = sorted( - zip(relation.roles, relation.arguments), - key=lambda role_and_span: ( - role_and_span[1].start, - role_and_span[1].end, - role_and_span[0], - ), - ) - return tuple(sorted_args) - else: - raise NotImplementedError( - f"the taskmodule does not yet support getting relation arguments for type: {type(relation)}" - ) - - -def construct_argument_marker(pos: str, label: Optional[str] = None, role: str = "SPAN") -> str: - if pos not in [START, END]: - raise ValueError(f"pos must be one of {START} or {END}, but got: {pos}") - start_or_end_marker = "" if pos == START else "/" - if label is not None: - return f"[{start_or_end_marker}{role}:{label}]" - else: - return f"[{start_or_end_marker}{role}]" - - -def inject_markers_into_text( - text: str, positions_and_markers: List[Tuple[int, str]] -) -> Tuple[str, Dict[int, int]]: - offset = 0 - original2new_pos = dict() - for original_pos, marker in sorted(positions_and_markers): - text = text[: original_pos + offset] + marker + text[original_pos + offset :] - offset += len(marker) - original2new_pos[original_pos] = original_pos + offset - return text, original2new_pos - - -def to_tensor(key: str, value: Any) -> Tensor: - return torch.tensor(value, dtype=DTYPES[key]) - - -def pad_or_stack(key: str, values: List[LongTensor]) -> Tensor: - if key in PAD_VALUES: - max_last_dim = None - if key == "tuple_indices": - max_last_dim = max(v.shape[-1] for v in values if len(v.shape) == 2) - values = [v.reshape(-1) for v in values] - result = pad_sequence(values, batch_first=True, padding_value=PAD_VALUES[key]) - if key == "tuple_indices": - batch_size = len(values) - result = result.reshape(batch_size, -1, max_last_dim) - else: - result = torch.stack(values, dim=0) - return result - - -@TaskModule.register() -class RESpanPairClassificationTaskModule(TaskModuleType, ChangesTokenizerVocabSize): - """Task module for relation extraction as span pair classification. - - This task module frames relation extraction as a span pair classification task where all candidate - pairs in a given text are classified at once. The task module injects start and end markers for - each entity (i.e. "[SPAN]" and "[/SPAN]") into the text and tokenizes the text (the markers are - handled as special tokens, and thus, kept as they are). It then collects the start- and end-marker - positions for each entity and constructs a model input encoding from the tokenized text and these - positions. The model target encoding consists of a list of label indices and a list of tuples - (head and tail) of argument indices that point into the start- and end-marker positions from the - model inputs. The model output is expected to be of the same format as the model target encoding, - but with probabilities for each label. - - This means, that the model should return only positive relations (argument indices + label) and - discard all negative ones. - - Args: - tokenizer_name_or_path: The name or path of the tokenizer to use. - relation_annotation: The name of the annotation layer that contains the binary relations. - partition_annotation: The name of the annotation layer that contains the labeled partitions. - If provided, the task module expects the document to have a partition layer with the - given name containing LabeledSpans. These entries are used to split the text into - partitions, e.g. paragraphs or sentences, that are treated as separate documents during - tokenization. Defaults to None. - tokenize_kwargs: Additional keyword arguments passed to the tokenizer during tokenization. - create_candidate_relations: Whether to create candidate relations for training. If True, the - task module creates all possible pairs of entities in the text as candidate relations. - Defaults to False. - create_candidate_relations_kwargs: Additional keyword arguments passed to the method that - creates the candidate relations (e.g. max_argument_distance). Defaults to None. - labels: The list of relation labels. If not provided, the task module will collect the labels - from the documents during preparation. Defaults to None. - entity_labels: The list of entity labels. If not provided, the task module will collect the - entity labels from the documents during preparation. Defaults to None. - add_type_to_marker: Whether to add the entity type to the markers. If True, the markers will - look like this: "[SPAN:entity_type]" and "[/SPAN:entity_type]" where entity_type is the - type of the respective entity. Defaults to False. - log_first_n_examples: The number of examples to log during training. If 0, no examples are logged. - Defaults to 0. - collect_statistics: Whether to collect statistics during preparation. If True, the task module - will collect statistics about the available, used, and skipped relations. Defaults to False. - """ - - PREPARED_ATTRIBUTES = ["labels", "entity_labels"] - - def __init__( - self, - tokenizer_name_or_path: str, - relation_annotation: str = "binary_relations", - no_relation_label: str = "no_relation", - partition_annotation: Optional[str] = None, - tokenize_kwargs: Optional[Dict[str, Any]] = None, - create_candidate_relations: bool = False, - create_candidate_relations_kwargs: Optional[Dict[str, Any]] = None, - labels: Optional[List[str]] = None, - entity_labels: Optional[List[str]] = None, - add_type_to_marker: bool = True, - log_first_n_examples: int = 0, - collect_statistics: bool = False, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.save_hyperparameters() - - self.relation_annotation = relation_annotation - self.no_relation_label = no_relation_label - self.tokenize_kwargs = tokenize_kwargs or {} - self.create_candidate_relations = create_candidate_relations - self.create_candidate_relations_kwargs = create_candidate_relations_kwargs or {} - self.labels = labels - self.add_type_to_marker = add_type_to_marker - self.entity_labels = entity_labels - self.partition_annotation = partition_annotation - # overwrite None with 0 for backward compatibility - self.log_first_n_examples = log_first_n_examples or 0 - self.collect_statistics = collect_statistics - - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) - - self.argument_markers = None - - self._logged_examples_counter = 0 - - self.reset_statistics() - - @property - def document_type(self) -> Optional[Type[DocumentType]]: - if self.partition_annotation is not None: - dt = TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions - else: - dt = TextDocumentWithLabeledSpansAndBinaryRelations - if self.relation_annotation == "binary_relations": - return dt - else: - logger.warning( - f"relation_annotation={self.relation_annotation} is " - f"not the default value ('binary_relations'), so the taskmodule {type(self).__name__} can not request " - f"the usual document type for auto-conversion ({dt.__name__}) because this has the bespoken default " - f"value as layer name instead of the provided one." - ) - return None - - @property - def tokenized_document_type(self) -> Type[TokenDocumentWithLabeledSpansAndBinaryRelations]: - if self.partition_annotation is None: - return TokenDocumentWithLabeledSpansAndBinaryRelations - else: - return TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions - - @property - def normalized_document_type(self) -> Type[TextDocumentWithLabeledSpansAndBinaryRelations]: - if self.partition_annotation is None: - return TextDocumentWithLabeledSpansAndBinaryRelations - else: - return TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions - - def normalize_document(self, document) -> TextDocumentWithLabeledSpansAndBinaryRelations: - span_layer_name = self.get_span_layer_name(document) - field_mapping = { - span_layer_name: "labeled_spans", - self.relation_annotation: "binary_relations", - } - if self.partition_annotation is not None: - field_mapping[self.partition_annotation] = "labeled_partitions" - casted_document = document.as_type( - self.normalized_document_type, field_mapping=field_mapping - ) - return casted_document - - def get_relation_layer(self, document: Document) -> AnnotationLayer[BinaryRelation]: - return document[self.relation_annotation] - - def get_span_layer_name(self, document: Document) -> str: - return document[self.relation_annotation].target_name - - def get_entity_layer(self, document: Document) -> AnnotationLayer[LabeledSpan]: - relations: AnnotationLayer[BinaryRelation] = self.get_relation_layer(document) - return relations.target_layer - - def _prepare(self, documents: Sequence[DocumentType]) -> None: - entity_labels: Set[str] = set() - relation_labels: Set[str] = set() - for document in documents: - relations: AnnotationLayer[BinaryRelation] = self.get_relation_layer(document) - entities: AnnotationLayer[LabeledSpan] = self.get_entity_layer(document) - - for entity in entities: - entity_labels.add(entity.label) - - for relation in relations: - relation_labels.add(relation.label) - - if self.no_relation_label in relation_labels: - relation_labels.remove(self.no_relation_label) - - self.labels = sorted(relation_labels) - self.entity_labels = sorted(entity_labels) - - def reset_statistics(self): - self._statistics = defaultdict(int) - self._collected_relations: Dict[str, List[Annotation]] = defaultdict(list) - - def collect_relation(self, kind: str, relation: Annotation): - if self.collect_statistics: - self._collected_relations[kind].append(relation) - - def collect_all_relations(self, kind: str, relations: Iterable[Annotation]): - if self.collect_statistics: - self._collected_relations[kind].extend(relations) - - def finalize_statistics(self): - if self.collect_statistics: - all_relations = set(self._collected_relations["available_tokenized"]) - used_relations = set(self._collected_relations["used"]) - skipped_other = all_relations - used_relations - for key, rels in self._collected_relations.items(): - rels_set = set(rels) - if key.startswith("skipped_"): - skipped_other -= rels_set - elif key.startswith("used_"): - pass - elif key in ["available", "available_tokenized", "used"]: - pass - else: - raise ValueError(f"unknown key: {key}") - for rel in rels_set: - self.increase_counter(key=(key, rel.label)) - for rel in skipped_other: - self.increase_counter(key=("skipped_other", rel.label)) - - def show_statistics(self): - if self.collect_statistics: - self.finalize_statistics() - - to_show = pd.Series(self._statistics) - if len(to_show.index.names) > 1: - to_show = to_show.unstack() - logger.info(f"statistics:\n{to_show.to_markdown()}") - - def increase_counter(self, key: Tuple[Any, ...], value: Optional[int] = 1): - if self.collect_statistics: - key_str = tuple(str(k) for k in key) - self._statistics[key_str] += value - - def encode(self, *args, **kwargs): - self.reset_statistics() - res = super().encode(*args, **kwargs) - self.show_statistics() - return res - - def collect_argument_markers(self, entity_labels: Iterable[str]) -> List[str]: - argument_markers: Set[str] = set() - for arg_pos in [START, END]: - if self.add_type_to_marker: - for entity_label in entity_labels: - argument_markers.add( - construct_argument_marker(pos=arg_pos, label=entity_label) - ) - else: - argument_markers.add(construct_argument_marker(pos=arg_pos)) - - return sorted(list(argument_markers)) - - def _post_prepare(self): - self.label_to_id = {label: i + 1 for i, label in enumerate(self.labels)} - self.label_to_id[self.no_relation_label] = 0 - self.id_to_label = {v: k for k, v in self.label_to_id.items()} - - self.argument_markers = self.collect_argument_markers(self.entity_labels) - num_added = self.tokenizer.add_special_tokens( - {"additional_special_tokens": self.argument_markers} - ) - if len(self.argument_markers) != num_added: - logger.warning( - f"expected to add {len(self.argument_markers)} argument markers, but added {num_added}. It seems " - f"that the tokenizer already contains some of the argument markers." - ) - - self.argument_markers_to_id = { - marker: self.tokenizer.vocab[marker] for marker in self.argument_markers - } - - def _create_candidate_relations( - self, - document: TokenDocumentWithLabeledSpansAndBinaryRelations, - max_argument_distance: Optional[int] = None, - argument_distance_type: str = "inner", - ) -> Sequence[Annotation]: - # TODO: ensure that the relation layer type is BinaryRelation! - labeled_spans = document.labeled_spans - candidate_relations = [] - for i, head in enumerate(labeled_spans): - for j, tail in enumerate(labeled_spans): - if i == j: - continue - rel = BinaryRelation(head=head, tail=tail, label=self.no_relation_label) - if max_argument_distance is not None: - arg_distance = get_span_distance( - start_end=(head.start, head.end), - other_start_end=(tail.start, tail.end), - distance_type=argument_distance_type, - ) - if arg_distance > max_argument_distance: - self.collect_relation("skipped_argument_distance", rel) - continue - candidate_relations.append(rel) - return candidate_relations - - def inject_markers_for_labeled_spans( - self, - document: TextDocumentWithLabeledSpansAndBinaryRelations, - ) -> Tuple[TextDocumentWithLabeledSpansAndBinaryRelations, Dict[LabeledSpan, LabeledSpan]]: - # collect markers and injection positions - positions_and_markers = [] - for labeled_span in document.labeled_spans: - label_or_none = labeled_span.label if self.add_type_to_marker else None - start_marker = construct_argument_marker(pos=START, label=label_or_none) - positions_and_markers.append((labeled_span.start, start_marker)) - end_marker = construct_argument_marker(pos=END, label=label_or_none) - positions_and_markers.append((labeled_span.end, end_marker)) - - if isinstance(document, TextDocumentWithLabeledPartitions): - # create "dummy" markers for the partitions so that entries for these positions are created - # in original2new_pos - for labeled_partition in document.labeled_partitions: - positions_and_markers.append((labeled_partition.start, "")) - positions_and_markers.append((labeled_partition.end, "")) - - # inject markers into the text - marked_text, original2new_pos = inject_markers_into_text( - document.text, positions_and_markers - ) - - # construct new spans - old2new_spans = dict() - for labeled_span in document.labeled_spans: - start = original2new_pos[labeled_span.start] - end = original2new_pos[labeled_span.end] - new_span = LabeledSpan(start=start, end=end, label=labeled_span.label) - old2new_spans[labeled_span] = new_span - - # construct new relations - old2new_relations = dict() - for relation in document.binary_relations: - if isinstance(relation, BinaryRelation): - head = old2new_spans[relation.head] - tail = old2new_spans[relation.tail] - new_relation = BinaryRelation(head=head, tail=tail, label=relation.label) - else: - raise NotImplementedError( - f"the taskmodule does not yet support relations of type {type(relation)}" - ) - old2new_relations[relation] = new_relation - - # construct new document - new_document = type(document)( - id=document.id, - metadata=deepcopy(document.metadata), - text=marked_text, - ) - new_document.labeled_spans.extend(old2new_spans.values()) - new_document.binary_relations.extend(old2new_relations.values()) - if isinstance(document, TextDocumentWithLabeledPartitions): - for labeled_partition in document.labeled_partitions: - new_start = original2new_pos[labeled_partition.start] - new_end = original2new_pos[labeled_partition.end] - new_labeled_partitions = labeled_partition.copy(start=new_start, end=new_end) - new_document.labeled_partitions.append(new_labeled_partitions) - - new2old_spans = {new_span: old_span for old_span, new_span in old2new_spans.items()} - return new_document, new2old_spans - - def encode_input( - self, - document: DocumentType, - is_training: bool = False, - ) -> Optional[Union[TaskEncodingType, Sequence[TaskEncodingType]]]: - self.collect_all_relations("available", self.get_relation_layer(document)) - - # 1. inject start and end markers for each entity into the text - # - save mapping from new entities to original entities - # 2. tokenize the text - # - add the marker tokens to the tokenizer as special tokens - # - tokenize with tokenize_document() - # 3. get start- and end-token positions for each entity - # 4. construct task encoding from tokenized text and entity positions - - normalized_document = self.normalize_document(document) - document_with_markers, injected2original_spans = self.inject_markers_for_labeled_spans( - normalized_document - ) - all_added_annotations: List[Dict[str, Dict[Annotation, Annotation]]] = [] - tokenized_docs = tokenize_document( - document_with_markers, - tokenizer=self.tokenizer, - result_document_type=self.tokenized_document_type, - partition_layer=( - "labeled_partitions" if self.partition_annotation is not None else None - ), - added_annotations=all_added_annotations, - strict_span_conversion=False, - **self.tokenize_kwargs, - ) - - task_encodings: List[TaskEncodingType] = [] - for tokenized_doc, tokenized_annotations in zip(tokenized_docs, all_added_annotations): - self.collect_all_relations("available_tokenized", tokenized_doc.binary_relations) - # collect start- and end-token positions for each entity - span_start_indices = [] - span_end_indices = [] - for labeled_span in tokenized_doc.labeled_spans: - # the start marker is one token before the start of the span - span_start_indices.append(labeled_span.start - 1) - # the end marker is one token after the end of the span, but the end index is exclusive - span_end_indices.append(labeled_span.end) - - labeled_span2idx = {span: idx for idx, span in enumerate(tokenized_doc.labeled_spans)} - tuple_indices = [] # list of lists of argument indices: [[head_idx, tail_idx], ...] - if self.create_candidate_relations: - candidate_relations = self._create_candidate_relations( - tokenized_doc, **self.create_candidate_relations_kwargs - ) - else: - candidate_relations = tokenized_doc.binary_relations - - # if there are no candidate relations, skip the whole (tokenized) document - if len(candidate_relations) == 0: - continue - - for relation in candidate_relations: - current_args_indices = [] - for _, arg_span in get_relation_argument_spans_and_roles(relation): - arg_idx = labeled_span2idx[arg_span] - current_args_indices.append(arg_idx) - tuple_indices.append(current_args_indices) - - encoding = tokenized_doc.metadata["tokenizer_encoding"] - inputs = { - "input_ids": encoding.ids, - "attention_mask": encoding.attention_mask, - "span_start_indices": span_start_indices, - "span_end_indices": span_end_indices, - "tuple_indices": tuple_indices, - "tuple_indices_mask": [True] * len(tuple_indices), - } - inputs_tensors = {k: to_tensor(k, v) for k, v in inputs.items()} - task_encodings.append( - TaskEncoding( - document=document, - inputs=inputs_tensors, - metadata={ - "tokenized_document": tokenized_doc, - "injected2original_spans": injected2original_spans, - "candidate_relations": candidate_relations, - "tokenized_annotations": tokenized_annotations, - }, - ) - ) - - return task_encodings - - def encode_target( - self, - task_encoding: TaskEncodingType, - ) -> TargetEncodingType: - gold_relations = task_encoding.metadata["tokenized_document"].binary_relations - gold_roles_and_args2relation = defaultdict(list) - for relation in gold_relations: - # If we manually set the labels, we only consider relations with a label in the label_to_id mapping - # This allows us to ignore relations with certain labels during training. - if relation.label in self.label_to_id: - gold_roles_and_args2relation[ - get_relation_argument_spans_and_roles(relation) - ].append(relation) - label_indices = [] # list of label indices - candidate_relations = [] - for candidate_relation in task_encoding.metadata["candidate_relations"]: - candidate_roles_and_args = get_relation_argument_spans_and_roles(candidate_relation) - gold_relations = gold_roles_and_args2relation.get(candidate_roles_and_args, []) - if len(gold_relations) == 0: - label_idx = self.label_to_id[candidate_relation.label] - self.collect_relation("used", candidate_relation) - elif len(gold_relations) == 1: - label_idx = self.label_to_id[gold_relations[0].label] - self.collect_relation("used", gold_relations[0]) - else: - # TODO: or should we add all gold relations with the same arguments? - logger.warning( - f"skip the candidate relation because there are more than one gold relation " - f"for its args and roles: {gold_relations}" - ) - for gold_relation in gold_relations: - self.collect_relation("skipped_same_arguments", gold_relation) - label_idx = PAD_VALUES["labels"] - - label_indices.append(label_idx) - candidate_relations.append(candidate_relation) - - task_encoding.metadata["candidate_relations"] = candidate_relations - target: TargetEncodingType = {"labels": to_tensor("labels", label_indices)} - - self._maybe_log_example(task_encoding=task_encoding, target=target) - - return target - - def _maybe_log_example( - self, - task_encoding: TaskEncodingType, - target: TargetEncodingType, - ): - """Maybe log the example.""" - - # log the first n examples - if self._logged_examples_counter < self.log_first_n_examples: - input_ids = task_encoding.inputs["input_ids"] - tokens = self.tokenizer.convert_ids_to_tokens(input_ids) - logger.info("*** Example ***") - logger.info(f"doc id: {task_encoding.document.id}") - logger.info(f"tokens: {' '.join([x for x in tokens])}") - logger.info(f"input_ids: {' '.join([str(x) for x in input_ids.tolist()])}") - # target data - span_start_indices = task_encoding.inputs["span_start_indices"] - span_end_indices = task_encoding.inputs["span_end_indices"] - labels = [self.id_to_label[label] for label in target["labels"].tolist()] - for i, (label, tuple_indices) in enumerate( - zip(labels, task_encoding.inputs["tuple_indices"]) - ): - logger.info(f"relation {i}: {label}") - for j, arg_idx in enumerate(tuple_indices): - arg_tokens = tokens[span_start_indices[arg_idx] : span_end_indices[arg_idx]] - logger.info(f"\targ {j}: {' '.join([str(x) for x in arg_tokens])}") - - self._logged_examples_counter += 1 - - def collate( - self, task_encodings: Sequence[TaskEncodingType] - ) -> Tuple[ModelInputType, Optional[ModelTargetType]]: - input_keys = task_encodings[0].inputs.keys() - inputs: ModelInputType = { # type: ignore - key: pad_or_stack(key, [task_encoding.inputs[key] for task_encoding in task_encodings]) - for key in input_keys - } - - targets: Optional[ModelTargetType] = None - if task_encodings[0].has_targets: - target_keys = task_encodings[0].targets.keys() - targets: ModelTargetType = { # type: ignore - key: pad_or_stack( - key, [task_encoding.targets[key] for task_encoding in task_encodings] - ) - for key in target_keys - } - - return inputs, targets - - def unbatch_output(self, model_output: ModelTargetType) -> Sequence[TaskOutputType]: - # shape: (batch_size, num_candidates) - label_ids = model_output["labels"].detach().cpu().tolist() - # shape: (batch_size, num_candidates, num_labels) - all_probabilities = model_output["probabilities"].detach().cpu().tolist() - unbatched_output = [] - for batch_idx in range(len(label_ids)): - labels = [] - probabilities = [] - for label_id, probs in zip(label_ids[batch_idx], all_probabilities[batch_idx]): - labels.append(self.id_to_label[label_id]) - probabilities.append(probs[label_id]) - entry: TaskOutputType = { - "labels": labels, - "probabilities": probabilities, - } - unbatched_output.append(entry) - - return unbatched_output - - def decode_annotations( - self, - task_output: TaskOutputType, - task_encoding: TaskEncodingType, - ) -> Dict[str, List[Annotation]]: - char2token_spans = task_encoding.metadata["tokenized_annotations"]["labeled_spans"] - token2char_spans = {v: k for k, v in char2token_spans.items()} - injected2original_spans = task_encoding.metadata["injected2original_spans"] - new_relations = [] - for candidate_relation, label, probability, is_valid in zip( - task_encoding.metadata["candidate_relations"], - task_output["labels"], - task_output["probabilities"], - task_encoding.inputs["tuple_indices_mask"], - ): - # exclude - # - padding entries (is_valid=False) - # - negative relations (if we have added them) - if is_valid and ( - label != self.no_relation_label or not self.create_candidate_relations - ): - token_head, token_tail = candidate_relation.head, candidate_relation.tail - char_head = token2char_spans[token_head] - char_tail = token2char_spans[token_tail] - original_head = injected2original_spans[char_head] - original_tail = injected2original_spans[char_tail] - new_annotation = candidate_relation.copy( - head=original_head, tail=original_tail, label=label, score=probability - ) - new_relations.append(new_annotation) - - return {"binary_relations": new_relations} - - def create_annotations_from_output( - self, - task_encoding: TaskEncodingType, - task_output: TaskOutputType, - ) -> Iterator[Tuple[str, Union[BinaryRelation, MultiLabeledBinaryRelation, NaryRelation]]]: - decoded_annotations = self.decode_annotations( - task_output=task_output, task_encoding=task_encoding - ) - - for relation in decoded_annotations["binary_relations"]: - yield self.relation_annotation, relation - - def configure_model_metric(self, stage: str) -> Metric: - if self.label_to_id is None: - raise ValueError( - "The taskmodule has not been prepared yet, so label_to_id is not known. " - "Please call taskmodule.prepare(documents) before configuring the model metric " - "or pass the labels to the taskmodule constructor an call taskmodule.post_prepare()." - ) - labels = [self.id_to_label[i] for i in range(len(self.label_to_id))] - common_metric_kwargs = { - "num_classes": len(labels), - "task": "multiclass", - "ignore_index": PAD_VALUES["labels"], - } - return WrappedMetricWithPrepareFunction( - metric=MetricCollection( - { - "micro/f1": F1Score(average="micro", **common_metric_kwargs), - "macro/f1": F1Score(average="macro", **common_metric_kwargs), - "f1_per_label": ClasswiseWrapper( - F1Score(average=None, **common_metric_kwargs), - labels=labels, - postfix="/f1", - ), - } - ), - prepare_function=_get_label_ids_from_model_output, - ) diff --git a/src/pie_modules/taskmodules/re_text_classification_with_indices.py b/src/pie_modules/taskmodules/re_text_classification_with_indices.py deleted file mode 100644 index 61fa41f25..000000000 --- a/src/pie_modules/taskmodules/re_text_classification_with_indices.py +++ /dev/null @@ -1,1508 +0,0 @@ -""" -workflow: - Document - -> (InputEncoding, TargetEncoding) -> TaskEncoding -> TaskBatchEncoding - -> ModelBatchEncoding -> ModelBatchOutput - -> TaskOutput - -> Document -""" - -import logging -from collections import defaultdict -from functools import partial -from typing import ( - Any, - Dict, - Iterable, - Iterator, - List, - Optional, - Sequence, - Set, - Tuple, - Type, - TypedDict, - Union, -) - -import numpy as np -import torch -from pie_core import ( - Annotation, - AnnotationLayer, - Document, - TaskEncoding, - TaskModule, -) -from pytorch_ie.taskmodules.interface import ChangesTokenizerVocabSize -from pytorch_ie.utils.window import get_window_around_slice -from torch import LongTensor -from torchmetrics import ClasswiseWrapper, F1Score, MetricCollection -from transformers import AutoTokenizer -from transformers.file_utils import PaddingStrategy -from transformers.tokenization_utils_base import TruncationStrategy -from typing_extensions import TypeAlias, TypeVar - -from pie_modules.annotations import ( - BinaryRelation, - LabeledSpan, - MultiLabeledBinaryRelation, - NaryRelation, - Span, -) -from pie_modules.documents import ( - TextBasedDocument, - TextDocumentWithLabeledSpansAndBinaryRelations, - TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions, -) -from pie_modules.models.simple_sequence_classification import ( - InputType as ModelInputType, -) -from pie_modules.models.simple_sequence_classification import ( - TargetType as ModelTargetType, -) -from pie_modules.taskmodules.common.mixins import RelationStatisticsMixin -from pie_modules.taskmodules.metrics import WrappedMetricWithPrepareFunction -from pie_modules.utils.span import distance as span_distance -from pie_modules.utils.span import is_contained_in -from pie_modules.utils.tokenization import ( - SpanNotAlignedWithTokenException, - get_aligned_token_span, -) - -InputEncodingType: TypeAlias = Dict[str, Any] -TargetEncodingType: TypeAlias = Sequence[int] -DocumentType: TypeAlias = TextBasedDocument - -TaskEncodingType: TypeAlias = TaskEncoding[ - DocumentType, - InputEncodingType, - TargetEncodingType, -] - - -class TaskOutputType(TypedDict, total=False): - labels: Sequence[str] - probabilities: Sequence[float] - - -TaskModuleType: TypeAlias = TaskModule[ - # _InputEncoding, _TargetEncoding, _TaskBatchEncoding, _ModelBatchOutput, _TaskOutput - DocumentType, - InputEncodingType, - TargetEncodingType, - Tuple[ModelInputType, Optional[ModelTargetType]], - ModelTargetType, - TaskOutputType, -] - - -HEAD = "head" -TAIL = "tail" -START = "start" -END = "end" - - -logger = logging.getLogger(__name__) - - -def _get_labels(model_output: ModelTargetType) -> LongTensor: - return model_output["labels"] - - -def _get_labels_together_remove_none_label( - predictions: ModelTargetType, targets: ModelTargetType, none_idx: int -) -> Tuple[LongTensor, LongTensor]: - mask_not_both_none = (predictions["labels"] != none_idx) | (targets["labels"] != none_idx) - predictions_not_none = predictions["labels"][mask_not_both_none] - targets_not_none = targets["labels"][mask_not_both_none] - return predictions_not_none, targets_not_none - - -def find_sublist(sub: List, bigger: List) -> int: - if not bigger: - return -1 - if not sub: - return 0 - first, rest = sub[0], sub[1:] - pos = 0 - try: - while True: - pos = bigger.index(first, pos) + 1 - if not rest or bigger[pos : pos + len(rest)] == rest: - return pos - 1 - except ValueError: - return -1 - - -class MarkerFactory: - def __init__(self, role_to_marker: Dict[str, str]): - self.role_to_marker = role_to_marker - - def _get_role_marker(self, role: str) -> str: - return self.role_to_marker[role] - - def _get_marker(self, role: str, is_start: bool, label: Optional[str] = None) -> str: - result = "[" - if not is_start: - result += "/" - result += self._get_role_marker(role) - if label is not None: - result += f":{label}" - result += "]" - return result - - def get_start_marker(self, role: str, label: Optional[str] = None) -> str: - return self._get_marker(role=role, is_start=True, label=label) - - def get_end_marker(self, role: str, label: Optional[str] = None) -> str: - return self._get_marker(role=role, is_start=False, label=label) - - def get_append_marker(self, role: str, label: Optional[str] = None) -> str: - role_marker = self._get_role_marker(role) - if label is None: - return f"[{role_marker}]" - else: - return f"[{role_marker}={label}]" - - @property - def all_roles(self) -> Set[str]: - return set(self.role_to_marker) - - def get_all_markers( - self, - entity_labels: List[str], - append_markers: bool = False, - add_type_to_marker: bool = False, - ) -> List[str]: - result: Set[str] = set() - if add_type_to_marker: - none_and_labels = [None] + entity_labels - else: - none_and_labels = [None] - for role in self.all_roles: - # create start and end markers without label and for all labels, if add_type_to_marker - for maybe_label in none_and_labels: - result.add(self.get_start_marker(role=role, label=maybe_label)) - result.add(self.get_end_marker(role=role, label=maybe_label)) - # create append markers for all labels - if append_markers: - for entity_label in entity_labels: - result.add(self.get_append_marker(role=role, label=entity_label)) - - # sort and convert to list - return sorted(result) - - -class RelationArgument: - def __init__( - self, - entity: LabeledSpan, - role: str, - token_span: Span, - add_type_to_marker: bool, - marker_factory: MarkerFactory, - ) -> None: - self.marker_factory = marker_factory - if role not in self.marker_factory.all_roles: - raise ValueError( - f"role='{role}' not in known roles={sorted(self.marker_factory.all_roles)} (did you " - f"initialise the taskmodule with the correct argument_role_to_marker dictionary?)" - ) - - self.entity = entity - - self.role = role - self.token_span = token_span - self.add_type_to_marker = add_type_to_marker - - @property - def maybe_label(self) -> Optional[str]: - return self.entity.label if self.add_type_to_marker else None - - @property - def as_start_marker(self) -> str: - return self.marker_factory.get_start_marker(role=self.role, label=self.maybe_label) - - @property - def as_end_marker(self) -> str: - return self.marker_factory.get_end_marker(role=self.role, label=self.maybe_label) - - @property - def as_append_marker(self) -> str: - # Note: we add the label in either case (we use self.entity.label instead of self.label) - return self.marker_factory.get_append_marker(role=self.role, label=self.entity.label) - - def shift_token_span(self, value: int): - self.token_span = Span( - start=self.token_span.start + value, end=self.token_span.end + value - ) - - -def get_relation_argument_spans_and_roles( - relation: Annotation, -) -> Tuple[Tuple[str, Annotation], ...]: - if isinstance(relation, BinaryRelation): - return (HEAD, relation.head), (TAIL, relation.tail) - elif isinstance(relation, NaryRelation): - # create unique order by sorting the arguments by their start and end positions and role - sorted_args = sorted( - zip(relation.roles, relation.arguments), - key=lambda role_and_span: ( - role_and_span[1].start, - role_and_span[1].end, - role_and_span[0], - ), - ) - return tuple(sorted_args) - else: - raise NotImplementedError( - f"the taskmodule does not yet support getting relation arguments for type: {type(relation)}" - ) - - -def construct_mask(input_ids: torch.LongTensor, positive_ids: List[Any]) -> torch.LongTensor: - """Construct a mask for the input_ids where all entries in mask_ids are 1.""" - masks = [torch.nonzero(input_ids == marker_token_id) for marker_token_id in positive_ids] - globs = torch.cat(masks) - value = torch.ones(globs.shape[0], dtype=int) - mask = torch.zeros(input_ids.shape, dtype=int) - mask.index_put_(tuple(globs.t()), value) - return mask - - -S = TypeVar("S", bound=Span) - - -def shift_span(span: S, offset: int) -> S: - return span.copy(start=span.start + offset, end=span.end + offset) - - -def bio_encode_spans( - spans: List[Tuple[int, int, str]], total_length: int, label2idx: Dict[str, int] -) -> List[int]: - # result = ["O"] * total_length - result = [0] * total_length - for start, end, label in spans: - # result[start] = f"B-{label}" - result[start] = label2idx[label] * 2 + 1 - for i in range(start + 1, end): - # result[i] = f"I-{label}" - result[i] = label2idx[label] * 2 + 2 - return result - - -@TaskModule.register() -class RETextClassificationWithIndicesTaskModule( - RelationStatisticsMixin, - TaskModuleType, - ChangesTokenizerVocabSize, -): - """Marker based relation extraction. This taskmodule prepares the input token ids in such a way - that before and after the candidate head and tail entities special marker tokens are inserted. - Then, the modified token ids can be simply passed into a transformer based text classifier - model. - - parameters: - - partition_annotation: str, optional. If specified, LabeledSpan annotations with this name are - expected to define partitions of the document that will be processed individually, e.g. sentences - or sections of the document text. - none_label: str, defaults to "no_relation". The relation label that indicate dummy/negative relations. - Predicted relations with that label will not be added to the document(s). - max_window: int, optional. If specified, use the tokens in a window of maximal this amount of tokens - around the center of head and tail entities and pass only that into the transformer. - create_relation_candidates: bool, defaults to False. If True, create relation candidates by pairwise - combining all entities in the document and assigning the none_label. If the document already contains - a relation with the entity pair, we do not add it again. If False, assume that the document already - contains relation annotations including negative examples (i.e. relations with the none_label). - handle_relations_with_same_arguments: str, defaults to "keep_none". If "keep_none", all relations that - share same arguments will be removed. If "keep_first", first occurred duplicate will be kept. - argument_type_whitelist: List[List[str]], optional, defaults to None. If set, only relations (candidates) - with given argument type tuples are created from document and by by `create_relation_candidates`. - This affects only model input. - argument_and_relation_type_whitelist: Union[Dict[str, List[List[str]]], List[List[str]]], optional, - defaults None. If set, only given relation types with given argument types will persist in - documents and generated by `create_relation_candidates`. This also affects predictions on - `decode()`, so it strictly filters both model input and output. Can also be passed as a list - of lists, where the first element is the relation type and the rest are the argument types. - """ - - PREPARED_ATTRIBUTES = ["labels", "entity_labels"] - - def __init__( - self, - tokenizer_name_or_path: str, - relation_annotation: str = "binary_relations", - add_candidate_relations: bool = False, - add_reversed_relations: bool = False, - partition_annotation: Optional[str] = None, - none_label: str = "no_relation", - padding: Union[bool, str, PaddingStrategy] = True, - truncation: Union[bool, str, TruncationStrategy] = True, - max_length: Optional[int] = None, - pad_to_multiple_of: Optional[int] = None, - multi_label: bool = False, - labels: Optional[List[str]] = None, - label_to_id: Optional[Dict[str, int]] = None, - add_type_to_marker: bool = False, - argument_role_to_marker: Optional[Dict[str, str]] = None, - single_argument_pair: bool = True, - append_markers: bool = False, - insert_markers: bool = True, - entity_labels: Optional[List[str]] = None, - reversed_relation_label_suffix: str = "_reversed", - symmetric_relations: Optional[List[str]] = None, - reverse_symmetric_relations: bool = True, - max_argument_distance: Optional[int] = None, - max_argument_distance_type: str = "inner", - max_argument_distance_tokens: Optional[int] = None, - max_argument_distance_type_tokens: str = "inner", - max_window: Optional[int] = None, - allow_discontinuous_text: bool = False, - log_first_n_examples: int = 0, - add_argument_indices_to_input: bool = False, - add_argument_tags_to_input: bool = False, - add_entity_tags_to_input: bool = False, - add_global_attention_mask_to_input: bool = False, - argument_type_whitelist: Optional[List[List[str]]] = None, - handle_relations_with_same_arguments: str = "keep_none", - argument_and_relation_type_whitelist: Optional[ - Union[Dict[str, List[List[str]]], List[List[str]]] - ] = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - if label_to_id is not None: - logger.warning( - "The parameter label_to_id is deprecated and will be removed in a future version. " - "Please use labels instead." - ) - id_to_label = {v: k for k, v in label_to_id.items()} - # reconstruct labels from label_to_id. Note that we need to remove the none_label - labels = [ - id_to_label[i] for i in range(len(id_to_label)) if id_to_label[i] != none_label - ] - self.save_hyperparameters(ignore=["label_to_id"]) - - self.relation_annotation = relation_annotation - self.add_candidate_relations = add_candidate_relations - self.add_reversed_relations = add_reversed_relations - self.padding = padding - self.truncation = truncation - self.labels = labels - self.max_length = max_length - self.pad_to_multiple_of = pad_to_multiple_of - self.multi_label = multi_label - self.add_type_to_marker = add_type_to_marker - self.single_argument_pair = single_argument_pair - self.append_markers = append_markers - self.insert_markers = insert_markers - self.entity_labels = entity_labels - self.partition_annotation = partition_annotation - self.none_label = none_label - self.reversed_relation_label_suffix = reversed_relation_label_suffix - self.symmetric_relations = set(symmetric_relations or []) - self.reverse_symmetric_relations = reverse_symmetric_relations - self.max_argument_distance = max_argument_distance - self.max_argument_distance_type = max_argument_distance_type - self.max_argument_distance_tokens = max_argument_distance_tokens - self.max_argument_distance_type_tokens = max_argument_distance_type_tokens - self.max_window = max_window - self.allow_discontinuous_text = allow_discontinuous_text - self.handle_relations_with_same_arguments = handle_relations_with_same_arguments - self.argument_type_whitelist: Optional[Set[Tuple[str, ...]]] = None - self.argument_and_relation_type_whitelist: Optional[Dict[str, Set[Tuple[str, ...]]]] = None - - if argument_type_whitelist is not None: - # hydra does not support tuples, so we got lists and need to convert them - self.argument_type_whitelist = {tuple(types) for types in argument_type_whitelist} - if argument_and_relation_type_whitelist is not None: - # hydra does not support tuples, so we got lists and need to convert them - if isinstance(argument_and_relation_type_whitelist, list): - self.argument_and_relation_type_whitelist = defaultdict(set) - for types_list in argument_and_relation_type_whitelist: - if len(types_list) < 1: - raise ValueError( - "argument_and_relation_type_whitelist must be a list of lists with at least one element" - ) - self.argument_and_relation_type_whitelist[types_list[0]].add( - tuple(types_list[1:]) - ) - else: - self.argument_and_relation_type_whitelist = { - rel: {tuple(types) for types in types_list} - for rel, types_list in argument_and_relation_type_whitelist.items() - } - # overwrite None with 0 for backward compatibility - self.log_first_n_examples = log_first_n_examples or 0 - self.add_argument_indices_to_input = add_argument_indices_to_input - self.add_argument_tags_to_input = add_argument_tags_to_input - self.add_entity_tags_to_input = add_entity_tags_to_input - self.add_global_attention_mask_to_input = add_global_attention_mask_to_input - if argument_role_to_marker is None: - self.argument_role_to_marker = {HEAD: "H", TAIL: "T"} - else: - self.argument_role_to_marker = argument_role_to_marker - - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) - - # used when allow_discontinuous_text - self.glue_token_ids = self._get_glue_token_ids() - - self.argument_markers = None - - self._logged_examples_counter = 0 - - def _get_glue_token_ids(self): - dummy_ids = self.tokenizer.build_inputs_with_special_tokens( - token_ids_0=[-1], token_ids_1=[-2] - ) - return dummy_ids[dummy_ids.index(-1) + 1 : dummy_ids.index(-2)] - - @property - def document_type(self) -> Optional[Type[DocumentType]]: - if self.partition_annotation is not None: - dt = TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions - else: - dt = TextDocumentWithLabeledSpansAndBinaryRelations - if self.relation_annotation == "binary_relations": - return dt - else: - logger.warning( - f"relation_annotation={self.relation_annotation} is " - f"not the default value ('binary_relations'), so the taskmodule {type(self).__name__} can not request " - f"the usual document type for auto-conversion ({dt.__name__}) because this has the bespoken default " - f"value as layer name instead of the provided one." - ) - return None - - def get_relation_layer(self, document: Document) -> AnnotationLayer[BinaryRelation]: - return document[self.relation_annotation] - - def get_entity_layer(self, document: Document) -> AnnotationLayer[LabeledSpan]: - relations: AnnotationLayer[BinaryRelation] = self.get_relation_layer(document) - return relations.target_layer - - def get_marker_factory(self) -> MarkerFactory: - return MarkerFactory(role_to_marker=self.argument_role_to_marker) - - def _prepare(self, documents: Sequence[DocumentType]) -> None: - entity_labels: Set[str] = set() - relation_labels: Set[str] = set() - for document in documents: - relations: AnnotationLayer[BinaryRelation] = self.get_relation_layer(document) - entities: AnnotationLayer[LabeledSpan] = self.get_entity_layer(document) - - for entity in entities: - entity_labels.add(entity.label) - - for relation in relations: - relation_labels.add(relation.label) - if self.add_reversed_relations: - if relation.label.endswith(self.reversed_relation_label_suffix): - raise ValueError( - f"doc.id={document.id}: the relation label '{relation.label}' already ends with " - f"the reversed_relation_label_suffix '{self.reversed_relation_label_suffix}', " - f"this is not allowed because we would not know if we should strip the suffix and " - f"revert the arguments during inference or not" - ) - if relation.label not in self.symmetric_relations: - relation_labels.add(relation.label + self.reversed_relation_label_suffix) - - if self.none_label in relation_labels: - relation_labels.remove(self.none_label) - - self.labels = sorted(relation_labels) - self.entity_labels = sorted(entity_labels) - - def encode(self, *args, **kwargs): - self.reset_statistics() - res = super().encode(*args, **kwargs) - self.show_statistics() - return res - - def _post_prepare(self): - self.label_to_id = {label: i + 1 for i, label in enumerate(self.labels)} - self.label_to_id[self.none_label] = 0 - self.id_to_label = {v: k for k, v in self.label_to_id.items()} - - self.marker_factory = self.get_marker_factory() - self.argument_markers = self.marker_factory.get_all_markers( - append_markers=self.append_markers, - add_type_to_marker=self.add_type_to_marker, - entity_labels=self.entity_labels, - ) - self.tokenizer.add_tokens(self.argument_markers, special_tokens=True) - - self.argument_markers_to_id = { - marker: self.tokenizer.vocab[marker] for marker in self.argument_markers - } - - self.argument_role2idx = { - role: i for i, role in enumerate(sorted(self.marker_factory.all_roles)) - } - - def _add_reversed_relations( - self, - arguments2relation: Dict[Tuple[Tuple[str, Annotation], ...], Annotation], - doc_id: Optional[str] = None, - ) -> None: - if self.add_reversed_relations: - for arguments, rel in list(arguments2relation.items()): - arg_roles, arg_spans = zip(*arguments) - if isinstance(rel, BinaryRelation): - label = rel.label - if label in self.symmetric_relations and not self.reverse_symmetric_relations: - continue - if label.endswith(self.reversed_relation_label_suffix): - raise ValueError( - f"doc.id={doc_id}: The relation has the label '{label}' which already ends with the " - f"reversed_relation_label_suffix='{self.reversed_relation_label_suffix}'. " - f"It looks like the relation is already reversed, which is not allowed." - ) - if rel.label not in self.symmetric_relations: - label += self.reversed_relation_label_suffix - - reversed_rel = BinaryRelation( - head=rel.tail, - tail=rel.head, - label=label, - score=rel.score, - ) - reversed_arguments = get_relation_argument_spans_and_roles(reversed_rel) - if reversed_arguments in arguments2relation: - prev_rel = arguments2relation[reversed_arguments] - prev_label = prev_rel.label - logger.warning( - f"doc.id={doc_id}: there is already a relation with reversed " - f"arguments={reversed_arguments} and label={prev_label}, so we do not add the reversed " - f"relation (with label {prev_label}) for these arguments" - ) - if self.collect_statistics: - self.collect_relation("skipped_reversed_same_arguments", reversed_rel) - continue - elif rel.label in self.symmetric_relations: - # warn if the original relation arguments were not sorted by their start and end positions - # in the case of symmetric relations - if not all(isinstance(arg_span, Span) for arg_span in arg_spans): - raise NotImplementedError( - f"doc.id={doc_id}: the taskmodule does not yet support adding reversed relations " - f"for symmetric relations with arguments that are no Spans: {arguments}" - ) - args_sorted = sorted( - [rel.head, rel.tail], key=lambda span: (span.start, span.end) - ) - if args_sorted != [rel.head, rel.tail]: - logger.warning( - f"doc.id={doc_id}: The symmetric relation with label '{label}' has arguments " - f"{arguments} which are not sorted by their start and end positions. " - f"This may lead to problems during evaluation because we assume that the " - f"arguments of symmetric relations were sorted in the beginning and, thus, interpret " - f"relations where this is not the case as reversed. All reversed relations will get " - f"their arguments swapped during inference in the case of add_reversed_relations=True " - f"to remove duplicates. You may consider adding reversed versions of the *symmetric* " - f"relations on your own and then setting *reverse_symmetric_relations* to False." - ) - if self.collect_statistics: - self.collect_relation( - "used_not_sorted_reversed_arguments", reversed_rel - ) - - arguments2relation[reversed_arguments] = reversed_rel - else: - raise NotImplementedError( - f"doc.id={doc_id}: the taskmodule does not yet support adding reversed relations for type: " - f"{type(rel)}" - ) - - def _filter_relations_by_argument_and_relation_type_whitelist( - self, - arguments2relation: Dict[Tuple[Tuple[str, Annotation], ...], Annotation], - doc_id: Optional[str] = None, - ) -> None: - if self.argument_and_relation_type_whitelist is not None: - for arguments, relation in list(arguments2relation.items()): - argument_labels = tuple(getattr(ann, "label") for role, ann in arguments) - relation_label = getattr(relation, "label") - if ( - relation_label not in self.argument_and_relation_type_whitelist - or argument_labels - not in self.argument_and_relation_type_whitelist[relation_label] - ): - rel = arguments2relation.pop(arguments) - self.collect_relation("skipped_argument_and_relation_type_whitelist", rel) - - def _filter_relations_by_argument_type_whitelist( - self, - arguments2relation: Dict[Tuple[Tuple[str, Annotation], ...], Annotation], - doc_id: Optional[str] = None, - ) -> None: - if self.argument_type_whitelist is not None: - for arguments, rel in list(arguments2relation.items()): - argument_labels = tuple(getattr(arg, "label") for _, arg in arguments) - if argument_labels not in self.argument_type_whitelist: - rel = arguments2relation.pop(arguments) - self.collect_relation("skipped_argument_type_whitelist", rel) - - def _add_candidate_relations( - self, - arguments2relation: Dict[Tuple[Tuple[str, Annotation], ...], Annotation], - entities: Iterable[Span], - arguments_blacklist: Optional[Set[Tuple[Tuple[str, Annotation], ...]]] = None, - doc_id: Optional[str] = None, - ) -> None: - if self.add_candidate_relations: - if self.marker_factory.all_roles == {HEAD, TAIL}: - # flatten argument_and_relation_type_whitelist values - arg_rel_whitelist_vals_set = ( - None - if self.argument_and_relation_type_whitelist is None - else {i for j in self.argument_and_relation_type_whitelist.values() for i in j} - ) - # iterate over all possible argument candidates - for head in entities: - for tail in entities: - if head == tail: - continue - - # Create a relation candidate with the none label. Otherwise, we use the existing relation. - new_relation = BinaryRelation( - head=head, tail=tail, label=self.none_label, score=1.0 - ) - new_relation_args = get_relation_argument_spans_and_roles(new_relation) - arg_roles, arg_spans = zip(*new_relation_args) - arg_labels = tuple(getattr(ann, "label") for ann in arg_spans) - - # Skip if argument_type_whitelist and/or argument_and_relation_type_whitelist - # are defined and current candidates do not fit. - if ( - self.argument_type_whitelist is not None - and arg_labels not in self.argument_type_whitelist - ) or ( - arg_rel_whitelist_vals_set is not None - and arg_labels not in arg_rel_whitelist_vals_set - ): - continue - - # check blacklist - if ( - arguments_blacklist is not None - and new_relation_args in arguments_blacklist - ): - continue - - # we use the new relation only if there is no existing relation with the same arguments - if new_relation_args not in arguments2relation: - arguments2relation[new_relation_args] = new_relation - else: - raise NotImplementedError( - f"doc.id={doc_id}: the taskmodule does not yet support adding relation candidates " - f"with argument roles other than 'head' and 'tail': {sorted(self.marker_factory.all_roles)}" - ) - - def _filter_relations_by_argument_distance( - self, - arguments2relation: Dict[Tuple[Tuple[str, Annotation], ...], Annotation], - doc_id: Optional[str] = None, - ) -> None: - if self.max_argument_distance is not None: - for arguments, rel in list(arguments2relation.items()): - if isinstance(rel, BinaryRelation): - if isinstance(rel.head, Span) and isinstance(rel.tail, Span): - dist = span_distance( - (rel.head.start, rel.head.end), - (rel.tail.start, rel.tail.end), - self.max_argument_distance_type, - ) - if dist > self.max_argument_distance: - arguments2relation.pop(arguments) - self.collect_relation("skipped_argument_distance", rel) - else: - raise NotImplementedError( - f"doc.id={doc_id}: the taskmodule does not yet support filtering relation candidates " - f"with arguments of type: {type(rel.head)} and {type(rel.tail)}" - ) - else: - raise NotImplementedError( - f"doc.id={doc_id}: the taskmodule does not yet support filtering relation candidates for " - f"type: {type(rel)}" - ) - - def encode_input( - self, - document: DocumentType, - is_training: bool = False, - ) -> Optional[Union[TaskEncodingType, Sequence[TaskEncodingType]]]: - all_relations: Sequence[Annotation] = self.get_relation_layer(document) - all_entities: Sequence[Span] = self.get_entity_layer(document) - self.collect_all_relations("available", all_relations) - - partitions: Sequence[Span] - if self.partition_annotation is not None: - partitions = document[self.partition_annotation] - if len(partitions) == 0: - logger.warning( - f"the document {document.id} has no '{self.partition_annotation}' partition entries, " - f"no inputs will be created!" - ) - else: - # use single dummy partition - partitions = [Span(start=0, end=len(document.text))] - - task_encodings: List[TaskEncodingType] = [] - for partition in partitions: - # get all entities that are contained in the current partition - entities: List[Span] = [ - entity - for entity in all_entities - if is_contained_in((entity.start, entity.end), (partition.start, partition.end)) - ] - - # Create a mapping from relation arguments to the respective relation objects. - # Note that the data can contain multiple relations with the same arguments. - entities_set = set(entities) - arguments2relations: Dict[Tuple[Tuple[str, Annotation], ...], List[Annotation]] = ( - defaultdict(list) - ) - for rel in all_relations: - # Skip relations with unknown labels. Use label_to_id because that contains the none_label - if rel.label not in self.label_to_id: - self.collect_relation("skipped_unknown_label", rel) - continue - - arguments = get_relation_argument_spans_and_roles(rel) - arg_roles, arg_spans = zip(*arguments) - - # filter out all relations that are completely outside the current partition - if all(arg_span not in entities_set for arg_span in arg_spans): - continue - - # filter relations that are only partially contained in the current partition, - # i.e. some arguments are in the partition and some are not - if any(arg_span not in entities_set for arg_span in arg_spans): - logger.warning( - f"doc.id={document.id}: there is a relation with label '{rel.label}' and arguments " - f"{arguments} that is only partially contained in the current partition. " - f"We skip this relation." - ) - self.collect_relation("skipped_partially_contained", rel) - continue - arguments2relations[arguments].append(rel) - - # resolve duplicates for same arguments - arguments2relation: Dict[Tuple[Tuple[str, Annotation], ...], Annotation] = {} - # we will never create an encoding for the relation candidates in arguments_blacklist - arguments_blacklist: Set[Tuple[Tuple[str, Annotation], ...]] = set() - for arguments, relations in arguments2relations.items(): - relations_set = set(relations) - # more than one unique relation with the same arguments - if len(relations_set) > 1: - arguments_resolved = tuple(map(lambda x: (x[0], x[1].resolve()), arguments)) - labels = [rel.label for rel in relations] - if self.handle_relations_with_same_arguments == "keep_first": - # keep only the first relation - arguments2relation[arguments] = relations[0] - for discard_rel in set(relations) - { - relations[0] - }: # remove all other relations - self.collect_relation("skipped_same_arguments", discard_rel) - if not self.collect_statistics: - # We show this warning only if statistics are disabled. - # We want to be informed if such skip occurs, but having it in statistics and - # getting lots of warnings in the same time seemed overwhelming. - logger.warning( - f"doc.id={document.id}: there are multiple relations with the same arguments " - f"{arguments_resolved}, but different labels: {labels}. We only keep the first " - f"occurring relation which has the label='{relations[0].label}'." - ) - elif self.handle_relations_with_same_arguments == "keep_none": - # add these arguments to the blacklist to not add them as 'no-relation's back again - arguments_blacklist.add(arguments) - # remove all relations with the same arguments - for discard_rel in relations_set: - self.collect_relation("skipped_same_arguments", discard_rel) - if not self.collect_statistics: - logger.warning( - f"doc.id={document.id}: there are multiple relations with the same arguments " - f"{arguments_resolved}, but different labels: {labels}. All relations will be removed." - ) - else: - raise ValueError( - f"'handle_relations_with_same_arguments' must be 'keep_first' or 'keep_none', " - f"but got `{self.handle_relations_with_same_arguments}`." - ) - else: - arguments2relation[arguments] = relations[0] - # more than one duplicate relation (with the same arguments) - if len(relations) > 1: - # if 'collect_statistics=true' such duplicates won't be collected and are not counted in - # statistics if 'collect_statistics=true' either as 'available' or as 'skipped_same_arguments' - logger.warning( - f"doc.id={document.id}: Relation annotation `{rel.resolve()}` is duplicated. " - f"We keep only one of them. Duplicate won't appear in statistics either as 'available' " - f"or as skipped." - ) - - # We use this filter before adding reversed relations because we also don't want them to be reversed - self._filter_relations_by_argument_and_relation_type_whitelist( - arguments2relation=arguments2relation, doc_id=document.id - ) - self._add_reversed_relations(arguments2relation=arguments2relation, doc_id=document.id) - self._filter_relations_by_argument_type_whitelist( - arguments2relation=arguments2relation, doc_id=document.id - ) - self._add_candidate_relations( - arguments2relation=arguments2relation, - arguments_blacklist=arguments_blacklist, - entities=entities, - doc_id=document.id, - ) - - self._filter_relations_by_argument_distance( - arguments2relation=arguments2relation, doc_id=document.id - ) - - without_special_tokens = self.max_window is not None - text = document.text[partition.start : partition.end] - encoding = self.tokenizer( - text, - padding=False, - truncation=self.truncation if self.max_window is None else False, - max_length=self.max_length, - is_split_into_words=False, - return_offsets_mapping=False, - add_special_tokens=not without_special_tokens, - ) - - for arguments, rel in arguments2relation.items(): - arg_roles, arg_spans = zip(*arguments) - if not all(isinstance(arg, LabeledSpan) for arg in arg_spans): - # TODO: add test case for this - raise ValueError( - f"the taskmodule expects the relation arguments to be of type LabeledSpan, " - f"but got {[type(arg) for arg in arg_spans]}" - ) - - arg_spans_partition = [ - shift_span(span, offset=-partition.start) for span in arg_spans - ] - # map character spans to token spans - try: - arg_token_spans = [ - get_aligned_token_span( - encoding=encoding, - char_span=arg, - ) - for arg in arg_spans_partition - ] - # Check if the mapping was successful. It may fail (and is None) if any argument start or end does not - # match a token start or end, respectively. - except SpanNotAlignedWithTokenException as e: - span_original = shift_span(e.span, offset=partition.start) - # the span is not attached because we shifted it above, so we can not use str(e.span) - span_text = document.text[span_original.start : span_original.end] - logger.warning( - f"doc.id={document.id}: Skipping invalid example, cannot get argument token slice for " - f'{span_original}: "{span_text}"' - ) - self.collect_relation("skipped_args_not_aligned", rel) - continue - - # create the argument objects - args = [ - RelationArgument( - entity=span, - role=role, - token_span=token_span, - add_type_to_marker=self.add_type_to_marker, - marker_factory=self.marker_factory, - ) - for span, role, token_span in zip(arg_spans, arg_roles, arg_token_spans) - ] - - if self.max_argument_distance_tokens is not None: - token_distances = [] - for idx1 in range(len(args) - 1): - for idx in range(idx1 + 1, len(args)): - arg1 = args[idx1] - arg2 = args[idx] - dist = span_distance( - (arg1.token_span.start, arg1.token_span.end), - (arg2.token_span.start, arg2.token_span.end), - self.max_argument_distance_type_tokens, - ) - token_distances.append(dist) - if len(token_distances) > 0: - if self.max_argument_distance_type_tokens == "outer": - max_dist = max(token_distances) - elif self.max_argument_distance_type_tokens == "inner": - if len(args) > 2: - raise NotImplementedError( - f"max_argument_distance_type_tokens={self.max_argument_distance_type_tokens} " - f"is not supported for relations with more than 2 arguments" - ) - max_dist = max(token_distances) - else: - raise NotImplementedError( - f"max_argument_distance_type_tokens={self.max_argument_distance_type_tokens} " - f"is not supported" - ) - if max_dist > self.max_argument_distance_tokens: - self.collect_relation("skipped_argument_distance_tokens", rel) - continue - - input_ids = encoding["input_ids"] - - entity_tags = None - if self.add_entity_tags_to_input: - entity_spans_partition = [ - shift_span(span, offset=-partition.start) for span in entities - ] - entity_token_spans = [] - for span in entity_spans_partition: - try: - entity_token_spans.append( - get_aligned_token_span( - encoding=encoding, - char_span=span, - ) - ) - except SpanNotAlignedWithTokenException as e: - span_original = shift_span(e.span, offset=partition.start) - span_text = document.text[span_original.start : span_original.end] - logger.warning( - f"doc.id={document.id}: Skipping invalid example, cannot get entity token slice for " - f'{span_original}: "{span_text}"' - ) - self.collect_relation("skipped_entity_not_aligned", rel) - continue - - entity_tags = bio_encode_spans( - spans=[ - (span.start, span.end, getattr(span, "label", "ENTITY")) - for span in entity_token_spans - ], - total_length=len(input_ids), - label2idx={ - label: idx for idx, label in enumerate(self.entity_labels or []) - }, - ) - - # windowing: we restrict the input to a window of a maximal size (max_window) with the arguments - # of the candidate relation in the center (as much as possible) - if self.max_window is not None: - # The actual number of tokens needs to be lower than max_window because we add two - # marker tokens (before / after) each argument and the default special tokens - # (e.g. CLS and SEP). - max_tokens = self.max_window - self.tokenizer.num_special_tokens_to_add() - if self.insert_markers: - max_tokens -= len(args) * 2 - # if we add the markers also to the end, this decreases the available window again by - # two tokens (marker + sep) per argument - if self.append_markers: - # TODO: add test case for this - max_tokens -= len(args) * 2 - - if self.allow_discontinuous_text: - if entity_tags is not None: - raise NotImplementedError( - "allow_discontinuous_text=True is not yet supported with add_entity_tags_to_input=True" - ) - - max_tokens_per_argument = max_tokens // len(args) - max_tokens_per_argument -= len(self.glue_token_ids) - if any( - arg.token_span.end - arg.token_span.start > max_tokens_per_argument - for arg in args - ): - self.collect_relation("skipped_too_long_argument", rel) - continue - - mask = np.zeros_like(input_ids) - for arg in args: - # if the input is already fully covered by one argument frame, we keep everything - if len(input_ids) <= max_tokens_per_argument: - mask[:] = 1 - break - arg_center = (arg.token_span.end + arg.token_span.start) // 2 - arg_frame_start = arg_center - max_tokens_per_argument // 2 - # shift the frame to the right if it is out of bounds - if arg_frame_start < 0: - arg_frame_start = 0 - arg_frame_end = arg_frame_start + max_tokens_per_argument - # shift the frame to the left if it is out of bounds - # Note that this can not cause to have arg_frame_start < 0 because we already - # checked that the frame is not larger than the input. - if arg_frame_end > len(input_ids): - arg_frame_end = len(input_ids) - arg_frame_start = arg_frame_end - max_tokens_per_argument - # still, a sanity check - if arg_frame_start < 0: - raise ValueError( - f"arg_frame_start={arg_frame_start} < 0 after adjusting arg_frame_end={arg_frame_end}" - ) - mask[arg_frame_start:arg_frame_end] = 1 - offsets = np.cumsum(mask != 1) - arg_cluster_offset_values = set() - # sort by start indices - args_sorted = sorted(args, key=lambda x: x.token_span.start) - for arg in args_sorted: - offset = offsets[arg.token_span.start] - arg_cluster_offset_values.add(offset) - arg.shift_token_span(-offset) - # shift back according to inserted glue patterns - num_glues = len(arg_cluster_offset_values) - 1 - arg.shift_token_span(num_glues * len(self.glue_token_ids)) - - new_input_ids: List[int] = [] - for arg_cluster_offset_value in sorted(arg_cluster_offset_values): - if len(new_input_ids) > 0: - new_input_ids.extend(self.glue_token_ids) - segment_mask = offsets == arg_cluster_offset_value - segment_input_ids = [ - input_id - for input_id, keep in zip(input_ids, mask & segment_mask) - if keep - ] - new_input_ids.extend(segment_input_ids) - - input_ids = new_input_ids - else: - # the slice from the beginning of the first entity to the end of the second is required - slice_required = ( - min(arg.token_span.start for arg in args), - max(arg.token_span.end for arg in args), - ) - window_slice = get_window_around_slice( - slice=slice_required, - max_window_size=max_tokens, - available_input_length=len(input_ids), - ) - # this happens if slice_required (all arguments) does not fit into max_tokens (the available window) - if window_slice is None: - self.collect_relation("skipped_too_long", rel) - continue - - window_start, window_end = window_slice - input_ids = input_ids[window_start:window_end] - - if entity_tags is not None: - entity_tags = entity_tags[window_start:window_end] - - for arg in args: - arg.shift_token_span(-window_start) - - # collect all markers with their target positions, the source argument, and - marker_ids_with_positions = [] - for arg in args: - marker_ids_with_positions.append( - ( - self.argument_markers_to_id[arg.as_start_marker], - arg.token_span.start, - arg, - START, - ) - ) - marker_ids_with_positions.append( - ( - self.argument_markers_to_id[arg.as_end_marker], - arg.token_span.end, - arg, - END, - ) - ) - - # create new input ids with the markers inserted and collect new mention offsets - input_ids_with_markers = list(input_ids) - offset = 0 - arg_start_indices = [-1] * len(self.argument_role2idx) - arg_end_indices = [-1] * len(self.argument_role2idx) - marker_ids_with_positions_sorted = sorted( - marker_ids_with_positions, key=lambda id_pos: id_pos[1] - ) - for ( - marker_id, - token_position, - arg, - marker_type, - ) in marker_ids_with_positions_sorted: - if self.insert_markers: - input_ids_with_markers = ( - input_ids_with_markers[: token_position + offset] - + [marker_id] - + input_ids_with_markers[token_position + offset :] - ) - if entity_tags is not None: - entity_tags = ( - entity_tags[: token_position + offset] - + [0] - + entity_tags[token_position + offset :] - ) - offset += 1 - if self.add_argument_indices_to_input or self.add_argument_tags_to_input: - idx = self.argument_role2idx[arg.role] - if marker_type == START: - if arg_start_indices[idx] != -1: - # TODO: add test case for this - raise ValueError( - f"Trying to overwrite arg_start_indices[{idx}]={arg_start_indices[idx]} with " - f"{token_position + offset} for document {document.id}" - ) - arg_start_indices[idx] = token_position + offset - elif marker_type == END: - if arg_end_indices[idx] != -1: - # TODO: add test case for this - raise ValueError( - f"Trying to overwrite arg_start_indices[{idx}]={arg_end_indices[idx]} with " - f"{token_position + offset} for document {document.id}" - ) - # -1 to undo the additional offset for the end marker which does not - # affect the mention offset - arg_end_indices[idx] = ( - token_position + offset - (1 if self.insert_markers else 0) - ) - - if self.append_markers: - if self.tokenizer.sep_token is None: - # TODO: add test case for this - raise ValueError("append_markers is True, but tokenizer has no sep_token") - sep_token_id = self.tokenizer.vocab[self.tokenizer.sep_token] - for arg in args: - if without_special_tokens: - # TODO: add test case for this - input_ids_with_markers.append(sep_token_id) - input_ids_with_markers.append( - self.argument_markers_to_id[arg.as_append_marker] - ) - else: - input_ids_with_markers.append( - self.argument_markers_to_id[arg.as_append_marker] - ) - input_ids_with_markers.append(sep_token_id) - if entity_tags is not None: - entity_tags.append(0) - entity_tags.append(0) - - # when windowing is used, we have to add the special tokens manually - if without_special_tokens: - original_input_ids_with_markers = input_ids_with_markers - input_ids_with_markers = self.tokenizer.build_inputs_with_special_tokens( - token_ids_0=input_ids_with_markers - ) - if self.add_argument_indices_to_input or self.add_argument_tags_to_input: - # get the number of prefix tokens - index_offset = find_sublist( - sub=original_input_ids_with_markers, bigger=input_ids_with_markers - ) - if index_offset == -1: - raise ValueError( - f"Could not find the original tokens in the prefixed tokens for document {document.id}" - ) - arg_start_indices = [ - idx + index_offset if idx != -1 else -1 for idx in arg_start_indices - ] - arg_end_indices = [ - idx + index_offset if idx != -1 else -1 for idx in arg_end_indices - ] - if entity_tags is not None: - special_tokens_mask = self.tokenizer.get_special_tokens_mask( - token_ids_0=input_ids_with_markers, already_has_special_tokens=True - ) - entity_tags_with_special = self.tokenizer.build_inputs_with_special_tokens( - token_ids_0=entity_tags - ) - entity_tags = [ - tag if not is_special else 0 - for tag, is_special in zip( - entity_tags_with_special, special_tokens_mask - ) - ] - - inputs = {"input_ids": input_ids_with_markers} - if self.add_argument_indices_to_input: - inputs["pooler_start_indices"] = arg_start_indices - inputs["pooler_end_indices"] = arg_end_indices - if self.add_argument_tags_to_input: - # create bio-encoded tags for the arguments - # using arg_start_indices, arg_end_indices, and marker_ids_with_positions_sorted - argument_spans = [ - ( - arg_start_indices[self.argument_role2idx[arg.role]], - arg_end_indices[self.argument_role2idx[arg.role]], - arg.role, - ) - for marker_id, token_position, arg, marker_type in marker_ids_with_positions_sorted - ] - argument_tag_ids = bio_encode_spans( - spans=argument_spans, - total_length=len(input_ids_with_markers), - label2idx=self.argument_role2idx, - ) - inputs["argument_tags"] = argument_tag_ids - - if entity_tags is not None: - inputs["entity_tags"] = entity_tags - - task_encodings.append( - TaskEncoding( - document=document, - inputs=inputs, - metadata=({"candidate_annotation": rel}), - ) - ) - - self.collect_relation("used", rel) - - return task_encodings - - def _maybe_log_example( - self, - task_encoding: TaskEncodingType, - target: TargetEncodingType, - ): - """Maybe log the example.""" - - # log the first n examples - if self._logged_examples_counter < self.log_first_n_examples: - input_ids = task_encoding.inputs["input_ids"] - tokens = self.tokenizer.convert_ids_to_tokens(input_ids) - target_labels = [self.id_to_label[label_id] for label_id in target] - logger.info("*** Example ***") - logger.info("doc id: %s", task_encoding.document.id) - logger.info("tokens: %s", " ".join([str(x) for x in tokens])) - logger.info("input_ids: %s", " ".join([str(x) for x in input_ids])) - logger.info("Expected label: %s (ids = %s)", target_labels, target) - - self._logged_examples_counter += 1 - - def encode_target( - self, - task_encoding: TaskEncodingType, - ) -> TargetEncodingType: - candidate_annotation = task_encoding.metadata["candidate_annotation"] - if isinstance(candidate_annotation, (BinaryRelation, NaryRelation)): - labels = [candidate_annotation.label] - else: - raise NotImplementedError( - f"encoding the target with a candidate_annotation of another type than BinaryRelation or" - f"NaryRelation is not yet supported. candidate_annotation has the type: " - f"{type(candidate_annotation)}" - ) - target = [self.label_to_id[label] for label in labels] - - self._maybe_log_example(task_encoding=task_encoding, target=target) - - return target - - def unbatch_output(self, model_output: ModelTargetType) -> Sequence[TaskOutputType]: - unbatched_output = [] - if self.multi_label: - raise NotImplementedError - else: - label_ids = model_output["labels"].detach().cpu().tolist() - probabilities = model_output["probabilities"].detach().cpu().tolist() - for batch_idx in range(len(label_ids)): - label_id = label_ids[batch_idx] - result: TaskOutputType = { - "labels": [self.id_to_label[label_id]], - "probabilities": [probabilities[batch_idx][label_id]], - } - unbatched_output.append(result) - - return unbatched_output - - def create_annotations_from_output( - self, - task_encoding: TaskEncodingType, - task_output: TaskOutputType, - ) -> Iterator[Tuple[str, Union[BinaryRelation, MultiLabeledBinaryRelation, NaryRelation]]]: - candidate_annotation = task_encoding.metadata["candidate_annotation"] - new_annotation: Union[BinaryRelation, MultiLabeledBinaryRelation, NaryRelation] - if self.multi_label: - raise NotImplementedError - else: - label = task_output["labels"][0] - probability = ( - task_output["probabilities"][0] if "probabilities" in task_output else 1.0 - ) - if isinstance(candidate_annotation, BinaryRelation): - head = candidate_annotation.head - tail = candidate_annotation.tail - # Reverse predicted reversed relations back. Serialization will remove any duplicated relations. - if self.add_reversed_relations: - # TODO: add test case for this - if label.endswith(self.reversed_relation_label_suffix): - label = label[: -len(self.reversed_relation_label_suffix)] - head, tail = tail, head - # If the predicted label is symmetric, we sort the arguments by its center. - elif label in self.symmetric_relations and self.reverse_symmetric_relations: - if not (isinstance(head, Span) and isinstance(tail, Span)): - raise ValueError( - f"the taskmodule expects the relation arguments of the candidate_annotation" - f"to be of type Span, but got head of type: {type(head)} and tail of type: " - f"{type(tail)}" - ) - # use a unique order for the arguments: sort by start and end positions - head, tail = sorted([head, tail], key=lambda span: (span.start, span.end)) - new_annotation = BinaryRelation( - head=head, tail=tail, label=label, score=probability - ) - elif isinstance(candidate_annotation, NaryRelation): - # TODO: add test case for this - if self.add_reversed_relations: - raise ValueError("can not reverse a NaryRelation") - new_annotation = NaryRelation( - arguments=candidate_annotation.arguments, - roles=candidate_annotation.roles, - label=label, - score=probability, - ) - else: - raise NotImplementedError( - f"creating a new annotation from a candidate_annotation of another type than BinaryRelation is " - f"not yet supported. candidate_annotation has the type: {type(candidate_annotation)}" - ) - - new_annotation_args = get_relation_argument_spans_and_roles(new_annotation) - arg_roles, arg_spans = zip(*new_annotation_args) - arg_labels = tuple(getattr(ann, "label") for ann in arg_spans) - - # Create annotation only if 1. and 2. are fulfilled: - if ( - # 1. the label is not the no-relation-label, - label != self.none_label - # or we did not create candidate relations, - or not self.add_candidate_relations - ) and ( - # 2. the argument_and_relation_type_whitelist is not set, - self.argument_and_relation_type_whitelist is None - # or the label and argument types are in the whitelist - or arg_labels in self.argument_and_relation_type_whitelist.get(label, {}) - ): - yield self.relation_annotation, new_annotation - - def _get_global_attention(self, input_ids: torch.LongTensor) -> torch.LongTensor: - # we want to have global attention on all marker tokens and the cls token - positive_token_ids = list(self.argument_markers_to_id.values()) + [ - self.tokenizer.cls_token_id - ] - global_attention_mask = construct_mask( - input_ids=input_ids, positive_ids=positive_token_ids - ) - return global_attention_mask - - def collate( - self, task_encodings: Sequence[TaskEncodingType] - ) -> Tuple[ModelInputType, Optional[ModelTargetType]]: - input_features = [ - {"input_ids": task_encoding.inputs["input_ids"]} for task_encoding in task_encodings - ] - - inputs: Dict[str, torch.LongTensor] = self.tokenizer.pad( - input_features, - padding=self.padding, - max_length=self.max_length, - pad_to_multiple_of=self.pad_to_multiple_of, - return_tensors="pt", - ) - if self.add_argument_tags_to_input: - argument_tags = [ - {"input_ids": task_encoding.inputs["argument_tags"]} - for task_encoding in task_encodings - ] - argument_tags_padded = self.tokenizer.pad( - argument_tags, - padding=self.padding, - max_length=self.max_length, - pad_to_multiple_of=self.pad_to_multiple_of, - return_tensors="pt", - ) - # increase all values by 1 because 0 is used for padding - inputs["argument_tags"] = argument_tags_padded["input_ids"] + 1 - # overwrite padding with 0 - inputs["argument_tags"][argument_tags_padded["attention_mask"] == 0] = 0 - - if self.add_entity_tags_to_input: - entity_tags = [ - {"input_ids": task_encoding.inputs["entity_tags"]} - for task_encoding in task_encodings - ] - entity_tags_padded = self.tokenizer.pad( - entity_tags, - padding=self.padding, - max_length=self.max_length, - pad_to_multiple_of=self.pad_to_multiple_of, - return_tensors="pt", - ) - # increase all values by 1 because 0 is used for padding - inputs["entity_tags"] = entity_tags_padded["input_ids"] + 1 - # overwrite padding with 0 - inputs["entity_tags"][entity_tags_padded["attention_mask"] == 0] = 0 - - if self.add_argument_indices_to_input: - inputs["pooler_start_indices"] = torch.tensor( - [task_encoding.inputs["pooler_start_indices"] for task_encoding in task_encodings] - ).to(torch.long) - inputs["pooler_end_indices"] = torch.tensor( - [task_encoding.inputs["pooler_end_indices"] for task_encoding in task_encodings] - ).to(torch.long) - - if self.add_global_attention_mask_to_input: - inputs["global_attention_mask"] = self._get_global_attention( - input_ids=inputs["input_ids"] - ) - - if not task_encodings[0].has_targets: - return inputs, None - - target_list: List[TargetEncodingType] = [ - task_encoding.targets for task_encoding in task_encodings - ] - targets = torch.tensor(target_list, dtype=torch.int64) - - if not self.multi_label: - targets = targets.flatten() - - return inputs, {"labels": targets} - - def configure_model_metric(self, stage: str) -> MetricCollection: - if self.label_to_id is None: - raise ValueError( - "The taskmodule has not been prepared yet, so label_to_id is not known. " - "Please call taskmodule.prepare(documents) before configuring the model metric " - "or pass the labels to the taskmodule constructor an call taskmodule.post_prepare()." - ) - # we use the length of label_to_id because that contains the none_label (in contrast to labels) - labels = [self.id_to_label[i] for i in range(len(self.label_to_id))] - common_metric_kwargs = { - "num_classes": len(labels), - "task": "multilabel" if self.multi_label else "multiclass", - } - return MetricCollection( - { - "with_tn": WrappedMetricWithPrepareFunction( - metric=MetricCollection( - { - "micro/f1": F1Score(average="micro", **common_metric_kwargs), - "macro/f1": F1Score(average="macro", **common_metric_kwargs), - "f1_per_label": ClasswiseWrapper( - F1Score(average=None, **common_metric_kwargs), - labels=labels, - postfix="/f1", - ), - } - ), - prepare_function=_get_labels, - ), - # We can not easily calculate the macro f1 here, because - # F1Score with average="macro" would still include the none_label. - "micro/f1_without_tn": WrappedMetricWithPrepareFunction( - metric=F1Score(average="micro", **common_metric_kwargs), - prepare_together_function=partial( - _get_labels_together_remove_none_label, - none_idx=self.label_to_id[self.none_label], - ), - ), - } - ) diff --git a/src/pie_modules/taskmodules/text_to_text.py b/src/pie_modules/taskmodules/text_to_text.py deleted file mode 100644 index 1a90a7854..000000000 --- a/src/pie_modules/taskmodules/text_to_text.py +++ /dev/null @@ -1,458 +0,0 @@ -import dataclasses -import logging -from functools import partial -from typing import ( - Any, - Dict, - Iterator, - List, - Optional, - Sequence, - Set, - Tuple, - Type, - Union, -) - -import torch -from pie_core import ( - Annotation, - AnnotationLayer, - Document, - TaskEncoding, - TaskModule, -) -from pie_core.taskmodule import ( - InputEncoding, - ModelBatchOutput, - TargetEncoding, - TaskBatchEncoding, -) -from pie_core.utils.hydra import resolve_type -from torchmetrics import Metric -from transformers import AutoTokenizer, PreTrainedTokenizer -from typing_extensions import TypeAlias - -from pie_modules.annotations import AnnotationWithText -from pie_modules.document.processing import ( - token_based_document_to_text_based, - tokenize_document, -) -from pie_modules.documents import TextBasedDocument, TokenBasedDocument - -from .common import BatchableMixin, get_first_occurrence_index -from .metrics import WrappedMetricWithPrepareFunction - -logger = logging.getLogger(__name__) - - -DocumentType: TypeAlias = TextBasedDocument - - -@dataclasses.dataclass -class InputEncodingType(BatchableMixin): - input_ids: List[int] - attention_mask: List[int] - - -@dataclasses.dataclass -class TargetEncodingType(BatchableMixin): - labels: List[int] - # this is optional because we use the same type for TaskOutputType, which does not have this field - decoder_attention_mask: Optional[List[int]] = None - - -TaskEncodingType: TypeAlias = TaskEncoding[ - DocumentType, - InputEncodingType, - TargetEncodingType, -] -TaskOutputType: TypeAlias = TargetEncodingType - - -# we use a custom un-batch function for metrics, because the text metrics such as ROUGEScore metric expects -# strings for input and target -def unbatch_and_untokenize( - batch: ModelBatchOutput, taskmodule: "TextToTextTaskModule" -) -> Sequence[str]: - unbatched = taskmodule.unbatch_output(batch) - texts = [ - taskmodule.tokenizer.decode(encoding.labels, skip_special_tokens=True) - for encoding in unbatched - ] - return texts - - -@TaskModule.register() -class TextToTextTaskModule( - TaskModule[ - DocumentType, - InputEncoding, - TargetEncoding, - TaskBatchEncoding, - ModelBatchOutput, - TaskOutputType, - ], -): - """A PIE task module for text-to-text tasks. It works with simple text annotations, e.g. - abstractive summaries, as target annotations. - - It can also be used with additional guidance annotations, e.g. questions for generative question answering, in - which case the text of the guidance annotation is prepended to the input text. - - Args: - tokenizer_name_or_path: The name (Huggingface Hub model identifier) or local path of the tokenizer to use. - document_type: The type of the input document. Must be a string that resolves to a subclass of - TextBasedDocument, e.g. "pie_modules.documents.TextDocumentWithAbstractiveSummary" for abstractive - summarization. - tokenized_document_type: The type of the tokenized document. Must be a string that resolves to a - subclass of TokenBasedDocument, e.g. "pie_modules.documents.TokenDocumentWithAbstractiveSummary" for - abstractive summarization. - target_layer: The name of the annotation layer that contains the target annotations, e.g. "abstractive_summary" - for abstractive summarization. - target_annotation_type: The type of the target annotations. Must be a string that resolves to a subclass - of AnnotationWithText, e.g. "pie_modules.annotations.AbstractiveSummary" for abstractive summarization. - guidance_layer: The name of the annotation layer that contains the guidance annotations. If set, the text of - the guidance annotation is prepended to the input text. - guidance_annotation_field: The name of the field in the target annotations that contains the guidance - annotation. Required if guidance_layer is defined to attach the guidance annotation to the newly created - target annotation. - text_metric_type: The type of the text metric to use for evaluation. Must be a string that resolves to a - subclass of Metric, e.g. "torchmetrics.text.ROUGEScore" for ROUGE score. - tokenizer_init_kwargs: Additional keyword arguments that are passed to the tokenizer constructor. - tokenizer_kwargs: Additional keyword arguments that are passed when calling the tokenizer. - partition_layer_name: The name of the annotation layer that contains the partitions. If set, the partitions - will be used to split the input text into multiple parts which are then tokenized separately. This can be - used to split long documents into multiple parts to avoid exceeding the maximum input length of the - tokenizer / model. - annotation_field_mapping: A mapping from input document annotation layer names to layer names defined in the - document_type / tokenized_document_type. This can be used if the actual input documents have different - annotation layer names than the provided document_type / tokenized_document_type. - log_first_n_examples: The number of examples to log. If set to a positive integer n, the first n examples will - be logged. This can be used to check if the input and target encodings are as expected. - """ - - def __init__( - self, - tokenizer_name_or_path: str, - document_type: str, - tokenized_document_type: str, - target_layer: str, - target_annotation_type: str, - guidance_layer: Optional[str] = None, - guidance_annotation_field: Optional[str] = None, - text_metric_type: Optional[str] = None, - tokenizer_init_kwargs: Optional[Dict[str, Any]] = None, - tokenizer_kwargs: Optional[Dict[str, Any]] = None, - partition_layer_name: Optional[str] = None, - annotation_field_mapping: Optional[Dict[str, str]] = None, - log_first_n_examples: Optional[int] = None, - **kwargs, - ): - super().__init__(**kwargs) - self.save_hyperparameters() - - self.target_layer = target_layer - self.guidance_layer = guidance_layer - self.target_annotation_type: Type[AnnotationWithText] = resolve_type( - target_annotation_type, expected_super_type=AnnotationWithText - ) - self.guidance_annotation_field = guidance_annotation_field - self.text_metric_type: Optional[Metric] = None - if text_metric_type is not None: - self.text_metric_type = resolve_type(text_metric_type, expected_super_type=Metric) - - # tokenization - self._document_type: Type[TextBasedDocument] = resolve_type( - document_type, expected_super_type=TextBasedDocument - ) - self._tokenized_document_type: Type[TokenBasedDocument] = resolve_type( - tokenized_document_type, expected_super_type=TokenBasedDocument - ) - self.tokenizer_name_or_path = tokenizer_name_or_path - self.tokenizer_kwargs = tokenizer_kwargs or {} - self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained( - tokenizer_name_or_path, - **(tokenizer_init_kwargs or {}), - ) - self.annotation_field_mapping = annotation_field_mapping or dict() - self.partition_layer_name = partition_layer_name - - # target encoding - self.pad_values = { - "input_ids": self.tokenizer.pad_token_id, - "attention_mask": 0, - "labels": self.tokenizer.pad_token_id, - "decoder_attention_mask": 0, - } - self.dtypes = { - "input_ids": torch.int64, - "attention_mask": torch.int64, - "labels": torch.int64, - "decoder_attention_mask": torch.int64, - } - - # logging - self.log_first_n_examples = log_first_n_examples - - @property - def document_type(self) -> Type[TextBasedDocument]: - return self._document_type - - @property - def tokenized_document_type(self) -> Type[TokenBasedDocument]: - return self._tokenized_document_type - - @property - def layer_names(self) -> List[str]: - return [self.target_layer] - - def get_mapped_layer(self, document: Document, layer_name: str) -> AnnotationLayer: - if layer_name in self.annotation_field_mapping: - layer_name = self.annotation_field_mapping[layer_name] - return document[layer_name] - - @property - def generation_config(self) -> Dict[str, Any]: - return {} - - def maybe_log_example( - self, - task_encoding: TaskEncodingType, - targets: Optional[TargetEncodingType] = None, - ) -> None: - if self.log_first_n_examples is not None and self.log_first_n_examples > 0: - inputs = task_encoding.inputs - - logger.info(f"input_ids: {inputs.input_ids}") - logger.info(f"attention_mask: {inputs.attention_mask}") - if targets is not None or task_encoding.has_targets: - targets = targets or task_encoding.targets - logger.info(f"labels: {targets.labels}") - self.log_first_n_examples -= 1 - - def warn_only_once(self, message: str) -> None: - if not hasattr(self, "_warned"): - self._warned: Set[str] = set() - if message not in self._warned: - logger.warning(f"{message} (This warning will only be shown once)") - self._warned.add(message) - - def encode_annotations( - self, - layers: Dict[str, AnnotationLayer], - metadata: Optional[Dict[str, Any]] = None, - ) -> TargetEncodingType: - target_annotations = [] - guidance_annotation = ( - metadata.get("guidance_annotation", None) if metadata is not None else None - ) - if guidance_annotation is not None: - if self.guidance_annotation_field is None: - raise ValueError( - "guidance_annotation is available, but guidance_annotation_field is not set" - ) - # filter annotations that belong to the guidance_annotation - for target_annotation in layers[self.target_layer]: - current_guidance_annotation = getattr( - target_annotation, self.guidance_annotation_field - ) - if current_guidance_annotation == guidance_annotation: - target_annotations.append(target_annotation) - else: - target_annotations = layers[self.target_layer] - - if len(target_annotations) == 0: - raise ValueError(f"target_annotations {self.target_layer} contains no annotation") - elif len(target_annotations) > 1: - self.warn_only_once( - f"target_annotations {self.target_layer} contains more than one annotation, " - f"but only the first one will be used" - ) - annotation = target_annotations[0] - if isinstance(annotation, self.target_annotation_type): - text = target_annotations[0].text - else: - raise ValueError( - f"target_annotations {self.target_layer} contains an annotation of type {type(annotation)}, " - f"but expected {self.target_annotation_type}" - ) - encoding = self.tokenizer(text) - return TargetEncodingType( - labels=encoding["input_ids"], decoder_attention_mask=encoding["attention_mask"] - ) - - def decode_annotations( - self, encoding: TaskOutputType, metadata: Optional[Dict[str, Any]] = None - ) -> Tuple[Dict[str, List[Annotation]], Any]: - text = self.tokenizer.decode(encoding.labels, skip_special_tokens=True) - annotation_kwargs = {} - if self.guidance_annotation_field is not None: - if metadata is None: - raise ValueError( - "metadata is required to decode annotations with guidance_annotation_field" - ) - guidance_annotation = metadata.get("guidance_annotation", None) - if guidance_annotation is not None: - if self.guidance_annotation_field is None: - raise ValueError( - "guidance_annotation is available, but guidance_annotation_field is not set" - ) - annotation_kwargs[self.guidance_annotation_field] = guidance_annotation - - decoded_layers = { - self.target_layer: [self.target_annotation_type(text=text, **annotation_kwargs)] - } - # no error collection yet - errors: Dict[str, Any] = {} - return decoded_layers, errors - - def tokenize_document( - self, document: DocumentType, source_text: Optional[str] = None - ) -> List[TokenBasedDocument]: - field_mapping = dict(self.annotation_field_mapping) - if self.partition_layer_name is not None: - field_mapping[self.partition_layer_name] = "labeled_partitions" - partition_layer = "labeled_partitions" - else: - partition_layer = None - casted_document = document.as_type(self.document_type, field_mapping=field_mapping) - - tokenizer_kwargs = dict(self.tokenizer_kwargs) - if source_text is not None: - tokenizer_kwargs["text"] = source_text - tokenized_docs = tokenize_document( - casted_document, - tokenizer=self.tokenizer, - result_document_type=self.tokenized_document_type, - partition_layer=partition_layer, - **tokenizer_kwargs, - ) - for idx, tokenized_doc in enumerate(tokenized_docs): - tokenized_doc.id = f"{document.id}-tokenized-{idx+1}-of-{len(tokenized_docs)}" - - return tokenized_docs - - def encode_input( - self, document: DocumentType, is_training: bool = False - ) -> Optional[Union[TaskEncodingType, Sequence[TaskEncodingType]]]: - task_encodings: List[TaskEncodingType] = [] - if self.guidance_layer is None: - guidance_annotations = [None] - else: - guidance_annotations = document[self.guidance_layer] - for guidance_annotation in guidance_annotations: - source_text = None - if guidance_annotation is not None: - # Here could also more sophisticated logic be implemented - source_text = guidance_annotation.text - tokenized_docs = self.tokenize_document(document, source_text=source_text) - for tokenized_doc in tokenized_docs: - tokenizer_encoding = tokenized_doc.metadata["tokenizer_encoding"] - task_encodings.append( - TaskEncoding( - document=document, - inputs=InputEncodingType( - input_ids=tokenizer_encoding.ids, - attention_mask=tokenizer_encoding.attention_mask, - ), - metadata={ - "tokenized_document": tokenized_doc, - "guidance_annotation": guidance_annotation, - }, - ) - ) - - return task_encodings - - def encode_target(self, task_encoding: TaskEncodingType) -> Optional[TargetEncodingType]: - document = task_encoding.metadata["tokenized_document"] - guidance_annotation = task_encoding.metadata["guidance_annotation"] - - layers = { - layer_name: self.get_mapped_layer(document, layer_name=layer_name) - for layer_name in self.layer_names - } - result = self.encode_annotations( - layers=layers, - metadata={**task_encoding.metadata, "guidance_annotation": guidance_annotation}, - ) - - self.maybe_log_example(task_encoding=task_encoding, targets=result) - return result - - def collate(self, task_encodings: Sequence[TaskEncodingType]) -> TaskBatchEncoding: - if len(task_encodings) == 0: - raise ValueError("no task_encodings available") - inputs = InputEncodingType.batch( - values=[x.inputs for x in task_encodings], - dtypes=self.dtypes, - pad_values=self.pad_values, - ) - - targets = None - if task_encodings[0].has_targets: - targets = TargetEncodingType.batch( - values=[x.targets for x in task_encodings], - dtypes=self.dtypes, - pad_values=self.pad_values, - ) - - return inputs, targets - - def unbatch_output(self, model_output: ModelBatchOutput) -> Sequence[TaskOutputType]: - labels = model_output["labels"] - batch_size = labels.size(0) - - # We use the position after the first eos token as the seq_len. - # Note that, if eos_id is not in model_output for a given batch item, the result will be - # model_output.size(1) + 1 (i.e. seq_len + 1) for that batch item. This is fine, because we use the - # seq_lengths just to truncate the output and want to keep everything if eos_id is not present. - seq_lengths = get_first_occurrence_index(labels, self.tokenizer.eos_token_id) + 1 - - result = [ - TaskOutputType(labels[i, : seq_lengths[i]].to(device="cpu").tolist()) - for i in range(batch_size) - ] - return result - - def create_annotations_from_output( - self, - task_encoding: TaskEncodingType, - task_output: TaskOutputType, - ) -> Iterator[Tuple[str, Annotation]]: - layers, errors = self.decode_annotations( - encoding=task_output, metadata=task_encoding.metadata - ) - tokenized_document = task_encoding.metadata["tokenized_document"] - - # Note: token_based_document_to_text_based() does not yet consider predictions, so we need to clear - # the main annotations and attach the predictions to that - for layer_name, annotations in layers.items(): - layer = self.get_mapped_layer(tokenized_document, layer_name=layer_name) - layer.clear() - layer.extend(annotations) - - untokenized_document = token_based_document_to_text_based( - tokenized_document, result_document_type=self.document_type - ) - - for layer_name in layers: - annotations = self.get_mapped_layer(untokenized_document, layer_name=layer_name) - for annotation in annotations: - yield layer_name, annotation.copy() - - def configure_model_generation(self) -> Optional[Dict[str, Any]]: - # we do not set any overrides here, because we want to use the default generation config as - # it is derived from the Huggingface base model config.json - return {} - - def configure_model_metric(self, stage: str) -> Optional[Metric]: - if self.text_metric_type is None: - return None - - return WrappedMetricWithPrepareFunction( - metric=self.text_metric_type(), - prepare_function=partial(unbatch_and_untokenize, taskmodule=self), - prepare_does_unbatch=True, - ) diff --git a/tests/document/processing/test_relation_argument_sorter.py b/tests/document/processing/test_relation_argument_sorter.py index fec8e1200..2068384ea 100644 --- a/tests/document/processing/test_relation_argument_sorter.py +++ b/tests/document/processing/test_relation_argument_sorter.py @@ -104,7 +104,7 @@ def test_get_args_wrong_type(document_with_nary_relation): == "relation NaryRelation(arguments=(LabeledSpan(start=0, end=8, label='PER', score=1.0), " "LabeledSpan(start=18, end=19, label='ORG', score=1.0), LabeledSpan(start=33, end=34, " "label='ORG', score=1.0)), roles=('person', 'worksAt', 'founded'), label='event', score=1.0) " - "has unknown type [], cannot get arguments from it" + "has unknown type [], cannot get arguments from it" ) @@ -122,7 +122,7 @@ def test_construct_relation_with_new_args_wrong_type(document_with_nary_relation == "original relation NaryRelation(arguments=(LabeledSpan(start=0, end=8, label='PER', score=1.0), " "LabeledSpan(start=18, end=19, label='ORG', score=1.0), LabeledSpan(start=33, end=34, label='ORG', " "score=1.0)), roles=('person', 'worksAt', 'founded'), label='event', score=1.0) has unknown type " - "[], cannot reconstruct it with new arguments" + "[], cannot reconstruct it with new arguments" ) diff --git a/tests/document/processing/test_tokenization.py b/tests/document/processing/test_tokenization.py index c940be177..969fa06ed 100644 --- a/tests/document/processing/test_tokenization.py +++ b/tests/document/processing/test_tokenization.py @@ -559,7 +559,7 @@ class WrongAnnotationType(TextBasedDocument): assert ( str(excinfo.value) == "can not convert layers that target the text but contain non-span annotations, " - "but found " + "but found " ) @@ -650,7 +650,7 @@ class WrongAnnotationType(TokenBasedDocument): assert ( str(excinfo.value) == "can not convert layers that target the tokens but contain non-span annotations, " - "but found " + "but found " ) diff --git a/tests/metrics/test_span_length_collector.py b/tests/metrics/test_span_length_collector.py index dcd69af01..9a071c04e 100644 --- a/tests/metrics/test_span_length_collector.py +++ b/tests/metrics/test_span_length_collector.py @@ -168,5 +168,5 @@ class TestDocument(TextBasedDocument): statistic(doc) assert ( str(excinfo.value) - == "span length calculation is not yet supported for " + == "span length calculation is not yet supported for " ) diff --git a/tests/models/__init__.py b/tests/models/__init__.py deleted file mode 100644 index 1e3e58002..000000000 --- a/tests/models/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -def trunc_number(x: float, n: int) -> float: - return int(x * 10**n) / 10**n diff --git a/tests/models/base_models/__init__.py b/tests/models/base_models/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/models/base_models/test_bart_as_pointer_network.py b/tests/models/base_models/test_bart_as_pointer_network.py deleted file mode 100644 index 554e52108..000000000 --- a/tests/models/base_models/test_bart_as_pointer_network.py +++ /dev/null @@ -1,983 +0,0 @@ -import pytest -import torch -from transformers import ( - BartModel, - BeamSearchScorer, - LogitsProcessorList, - MinLengthLogitsProcessor, -) -from transformers.generation import BeamSearchEncoderDecoderOutput - -from pie_modules.models.base_models import ( - BartAsPointerNetwork, - BartModelWithDecoderPositionIds, -) -from tests import _config_to_str -from tests.models import trunc_number - -# this is a small model that can be used for testing -MODEL_NAME_OR_PATH = "sshleifer/bart-tiny-random" -DECODER_POSITION_ID_PATTERN = [0, 0, 1, 0, 0, 1, 1] -CONFIGS = [{}, {"decoder_position_id_mode": "pattern"}] -CONFIG_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIG_DICT.keys()) -def config_str(request): - return request.param - - -@pytest.fixture(scope="module") -def config(config_str): - return CONFIG_DICT[config_str] - - -@pytest.fixture(scope="module") -def document(): - from pie_modules.annotations import BinaryRelation, LabeledSpan - from pie_modules.documents import ( - TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions, - ) - - doc = TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions( - text="This is a dummy text about nothing. Trust me." - ) - span1 = LabeledSpan(start=10, end=20, label="content") - span2 = LabeledSpan(start=27, end=34, label="topic") - span3 = LabeledSpan(start=42, end=44, label="person") - doc.labeled_spans.extend([span1, span2, span3]) - assert str(span1) == "dummy text" - assert str(span2) == "nothing" - assert str(span3) == "me" - rel = BinaryRelation(head=span1, tail=span2, label="is_about") - doc.binary_relations.append(rel) - assert str(rel.label) == "is_about" - assert str(rel.head) == "dummy text" - assert str(rel.tail) == "nothing" - - sent1 = LabeledSpan(start=0, end=35, label="1") - sent2 = LabeledSpan(start=36, end=45, label="2") - doc.labeled_partitions.extend([sent1, sent2]) - assert str(sent1) == "This is a dummy text about nothing." - assert str(sent2) == "Trust me." - return doc - - -@pytest.fixture(scope="module") -def taskmodule(document): - from pie_modules.taskmodules import PointerNetworkTaskModuleForEnd2EndRE - - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path=MODEL_NAME_OR_PATH, - partition_layer_name="labeled_partitions", - create_constraints=True, - ) - - taskmodule.prepare(documents=[document]) - - return taskmodule - - -@pytest.fixture(scope="module") -def model(config) -> BartAsPointerNetwork: - model_name_or_path = MODEL_NAME_OR_PATH - - torch.random.manual_seed(42) - model = BartAsPointerNetwork.from_pretrained( - model_name_or_path, - # label id space - bos_token_id=0, # taskmodule.bos_id, - eos_token_id=1, # taskmodule.eos_id, - pad_token_id=1, # taskmodule.eos_id, - # target token id space - target_token_ids=[0, 2, 50266, 50269, 50268, 50265, 50267], # taskmodule.target_token_ids, - # mapping to better initialize the label embedding weights - # taken from taskmodule.label_embedding_weight_mapping - embedding_weight_mapping={ - 50266: [39763], - 50269: [10166], - 50268: [5970], - 50265: [45260], - 50267: [354, 1215, 9006], - }, - decoder_position_id_pattern=DECODER_POSITION_ID_PATTERN, - **config, - ) - - return model - - -def test_model(model, config): - assert model is not None - named_parameters = dict(model.named_parameters()) - parameter_means = {k: trunc_number(v.mean().item(), 7) for k, v in named_parameters.items()} - parameter_means_expected = { - "model.shared.weight": -1.41e-05, - "model.encoder.embed_positions.weight": -0.0001324, - "model.encoder.layers.0.self_attn.k_proj.weight": -0.0004574, - "model.encoder.layers.0.self_attn.k_proj.bias": 0.0, - "model.encoder.layers.0.self_attn.v_proj.weight": -0.0005457, - "model.encoder.layers.0.self_attn.v_proj.bias": 0.0, - "model.encoder.layers.0.self_attn.q_proj.weight": -0.0009775, - "model.encoder.layers.0.self_attn.q_proj.bias": 0.0, - "model.encoder.layers.0.self_attn.out_proj.weight": -0.0001075, - "model.encoder.layers.0.self_attn.out_proj.bias": 0.0, - "model.encoder.layers.0.self_attn_layer_norm.weight": 1.0, - "model.encoder.layers.0.self_attn_layer_norm.bias": 0.0, - "model.encoder.layers.0.fc1.weight": -0.0008655, - "model.encoder.layers.0.fc1.bias": 0.0, - "model.encoder.layers.0.fc2.weight": 0.0015535, - "model.encoder.layers.0.fc2.bias": 0.0, - "model.encoder.layers.0.final_layer_norm.weight": 1.0, - "model.encoder.layers.0.final_layer_norm.bias": 0.0, - "model.encoder.layers.1.self_attn.k_proj.weight": -0.0007831, - "model.encoder.layers.1.self_attn.k_proj.bias": 0.0, - "model.encoder.layers.1.self_attn.v_proj.weight": 0.0001186, - "model.encoder.layers.1.self_attn.v_proj.bias": 0.0, - "model.encoder.layers.1.self_attn.q_proj.weight": 0.0006847, - "model.encoder.layers.1.self_attn.q_proj.bias": 0.0, - "model.encoder.layers.1.self_attn.out_proj.weight": 0.0011724, - "model.encoder.layers.1.self_attn.out_proj.bias": 0.0, - "model.encoder.layers.1.self_attn_layer_norm.weight": 1.0, - "model.encoder.layers.1.self_attn_layer_norm.bias": 0.0, - "model.encoder.layers.1.fc1.weight": 0.0007757, - "model.encoder.layers.1.fc1.bias": 0.0, - "model.encoder.layers.1.fc2.weight": -0.0002014, - "model.encoder.layers.1.fc2.bias": 0.0, - "model.encoder.layers.1.final_layer_norm.weight": 1.0, - "model.encoder.layers.1.final_layer_norm.bias": 0.0, - "model.encoder.layernorm_embedding.weight": 1.0, - "model.encoder.layernorm_embedding.bias": 0.0, - "model.decoder.embed_positions.weight": -0.0001275, - "model.decoder.layers.0.self_attn.k_proj.weight": -0.0010682, - "model.decoder.layers.0.self_attn.k_proj.bias": 0.0, - "model.decoder.layers.0.self_attn.v_proj.weight": 0.0005057, - "model.decoder.layers.0.self_attn.v_proj.bias": 0.0, - "model.decoder.layers.0.self_attn.q_proj.weight": 0.0003248, - "model.decoder.layers.0.self_attn.q_proj.bias": 0.0, - "model.decoder.layers.0.self_attn.out_proj.weight": -0.0002014, - "model.decoder.layers.0.self_attn.out_proj.bias": 0.0, - "model.decoder.layers.0.self_attn_layer_norm.weight": 1.0, - "model.decoder.layers.0.self_attn_layer_norm.bias": 0.0, - "model.decoder.layers.0.encoder_attn.k_proj.weight": -0.0004254, - "model.decoder.layers.0.encoder_attn.k_proj.bias": 0.0, - "model.decoder.layers.0.encoder_attn.v_proj.weight": -0.0004049, - "model.decoder.layers.0.encoder_attn.v_proj.bias": 0.0, - "model.decoder.layers.0.encoder_attn.q_proj.weight": -0.0003516, - "model.decoder.layers.0.encoder_attn.q_proj.bias": 0.0, - "model.decoder.layers.0.encoder_attn.out_proj.weight": 0.0009908, - "model.decoder.layers.0.encoder_attn.out_proj.bias": 0.0, - "model.decoder.layers.0.encoder_attn_layer_norm.weight": 1.0, - "model.decoder.layers.0.encoder_attn_layer_norm.bias": 0.0, - "model.decoder.layers.0.fc1.weight": 0.0008378, - "model.decoder.layers.0.fc1.bias": 0.0, - "model.decoder.layers.0.fc2.weight": -2e-05, - "model.decoder.layers.0.fc2.bias": 0.0, - "model.decoder.layers.0.final_layer_norm.weight": 1.0, - "model.decoder.layers.0.final_layer_norm.bias": 0.0, - "model.decoder.layers.1.self_attn.k_proj.weight": -0.0007669, - "model.decoder.layers.1.self_attn.k_proj.bias": 0.0, - "model.decoder.layers.1.self_attn.v_proj.weight": -0.0007123, - "model.decoder.layers.1.self_attn.v_proj.bias": 0.0, - "model.decoder.layers.1.self_attn.q_proj.weight": 0.0012958, - "model.decoder.layers.1.self_attn.q_proj.bias": 0.0, - "model.decoder.layers.1.self_attn.out_proj.weight": -0.0006818, - "model.decoder.layers.1.self_attn.out_proj.bias": 0.0, - "model.decoder.layers.1.self_attn_layer_norm.weight": 1.0, - "model.decoder.layers.1.self_attn_layer_norm.bias": 0.0, - "model.decoder.layers.1.encoder_attn.k_proj.weight": -0.0006906, - "model.decoder.layers.1.encoder_attn.k_proj.bias": 0.0, - "model.decoder.layers.1.encoder_attn.v_proj.weight": -0.0009213, - "model.decoder.layers.1.encoder_attn.v_proj.bias": 0.0, - "model.decoder.layers.1.encoder_attn.q_proj.weight": -0.000842, - "model.decoder.layers.1.encoder_attn.q_proj.bias": 0.0, - "model.decoder.layers.1.encoder_attn.out_proj.weight": 0.0008073, - "model.decoder.layers.1.encoder_attn.out_proj.bias": 0.0, - "model.decoder.layers.1.encoder_attn_layer_norm.weight": 1.0, - "model.decoder.layers.1.encoder_attn_layer_norm.bias": 0.0, - "model.decoder.layers.1.fc1.weight": 0.0015493, - "model.decoder.layers.1.fc1.bias": 0.0, - "model.decoder.layers.1.fc2.weight": -0.0009827, - "model.decoder.layers.1.fc2.bias": 0.0, - "model.decoder.layers.1.final_layer_norm.weight": 1.0, - "model.decoder.layers.1.final_layer_norm.bias": 0.0, - "model.decoder.layernorm_embedding.weight": 1.0, - "model.decoder.layernorm_embedding.bias": 0.0, - "pointer_head.encoder_mlp.0.weight": 0.0004805, - "pointer_head.encoder_mlp.0.bias": 0.0, - "pointer_head.encoder_mlp.3.weight": 0.0001837, - "pointer_head.encoder_mlp.3.bias": 0.0, - } - assert parameter_means == parameter_means_expected - assert isinstance(model, BartAsPointerNetwork) - if config == {}: - assert isinstance(model.model, BartModel) - elif config == {"decoder_position_id_mode": "pattern"}: - assert isinstance(model.model, BartModelWithDecoderPositionIds) - else: - raise ValueError(f"Unknown config: {config}") - - -@pytest.fixture(scope="module") -def batch(): - inputs = { - "input_ids": torch.tensor( - [ - [0, 713, 16, 10, 34759, 2788, 59, 1085, 4, 2], - [0, 18823, 162, 4, 2, 1, 1, 1, 1, 1], - ] - ), - "attention_mask": torch.tensor( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]] - ), - } - targets = { - "labels": torch.tensor([[14, 14, 5, 11, 12, 3, 6, 1], [9, 9, 4, 2, 2, 2, 2, 1]]), - "decoder_attention_mask": torch.tensor( - [[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]] - ), - } - return inputs, targets - - -@pytest.fixture(scope="module") -def batch_with_constraints(batch): - constraints = torch.tensor( - [ - [ - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ], - [ - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, -1, -1, -1, -1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - ], - ] - ) - targets_with_constraints = {**batch[1], "constraints": constraints} - return batch[0], targets_with_constraints - - -@pytest.mark.skip(reason="This is just to show how to create the batch.") -def test_batch_with_constraints(batch_with_constraints, taskmodule, document): - inputs, targets = batch_with_constraints - task_encodings = taskmodule.encode([document], encode_target=True) - batch_from_documents = taskmodule.collate(task_encodings) - inputs_from_documents, targets_from_documents = batch_from_documents - for key in inputs: - torch.testing.assert_close(inputs[key], inputs_from_documents[key]) - - for key in targets: - torch.testing.assert_close(targets[key], targets_from_documents[key]) - - -@pytest.fixture(scope="module") -def decoder_input_ids(model): - # taken from batch[1]["labels"] - labels = torch.tensor([[14, 14, 5, 11, 12, 3, 6, 1], [9, 9, 4, 2, 2, 2, 2, 1]]) - decoder_input_ids = model.prepare_decoder_input_ids_from_labels(labels=labels) - return decoder_input_ids - - -def test_prepare_decoder_input_ids_from_labels(decoder_input_ids): - assert decoder_input_ids.shape == (2, 8) - torch.testing.assert_close( - decoder_input_ids, - torch.tensor([[0, 14, 14, 5, 11, 12, 3, 6], [0, 9, 9, 4, 2, 2, 2, 2]]), - ) - - -def test_forward(model, batch, decoder_input_ids, config): - inputs, targets = batch - torch.manual_seed(42) - outputs = model(**inputs, decoder_input_ids=decoder_input_ids) - assert outputs.loss is None - assert outputs.logits is not None - # shape: (batch_size, output_seq_len, target_size=num_target_ids+num_offsets) - assert outputs.logits.shape == (2, 8, 17) - # check exact values only for the first sequence output - torch.testing.assert_close( - outputs.logits[:, 0, :], - torch.tensor( - [ - [ - -1.0000000138484279e24, - -0.23238050937652588, - 0.2958170175552368, - 0.05529244244098663, - 0.04253090173006058, - 0.10081345587968826, - -0.07145103067159653, - 0.12317530065774918, - -0.06861806660890579, - 0.07819556444883347, - 0.006490768864750862, - -0.040455855429172516, - 0.03176971897482872, - 0.05362509936094284, - 0.04528001323342323, - -0.0684177577495575, - -1.0000000331813535e32, - ], - [ - -1.0000000138484279e24, - -0.23274855315685272, - 0.2960396707057953, - 0.05556505173444748, - 0.04273710399866104, - 0.10071954131126404, - -0.071356862783432, - 0.12314081937074661, - 0.06498698145151138, - 0.07938676327466965, - -0.07943986356258392, - -1.0000000331813535e32, - -1.0000000331813535e32, - -1.0000000331813535e32, - -1.0000000331813535e32, - -1.0000000331813535e32, - -1.0000000331813535e32, - ], - ] - ), - ) - # check the sum of all logits - if config == {}: - torch.testing.assert_close( - outputs.logits.sum(0).sum(0), - torch.tensor( - [ - -1.6000000221574846e25, - -0.9064984321594238, - 1.189674735069275, - 0.9796359539031982, - 0.1837124526500702, - 1.3070943355560303, - -0.1210818886756897, - 0.5316579937934875, - -0.12306825071573257, - 0.6218758225440979, - -0.4374474287033081, - -8.000000265450828e32, - -8.000000265450828e32, - -8.000000265450828e32, - -8.000000265450828e32, - -8.000000265450828e32, - -1.6000000530901656e33, - ] - ), - ) - elif config == {"decoder_position_id_mode": "pattern"}: - torch.testing.assert_close( - outputs.logits.sum(0).sum(0), - torch.tensor( - [ - -1.6000000221574846e25, - -0.5539568662643433, - 0.7004716396331787, - 1.5720455646514893, - -0.3760950267314911, - 0.7738710641860962, - -0.1090446263551712, - 0.287150502204895, - -0.04344810172915459, - 0.3674442768096924, - -0.6838937997817993, - -8.000000265450828e32, - -8.000000265450828e32, - -8.000000265450828e32, - -8.000000265450828e32, - -8.000000265450828e32, - -1.6000000530901656e33, - ] - ), - ) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_forward_with_labels(model, batch, config): - inputs, targets = batch - targets_without_constraints = { - key: value for key, value in targets.items() if key != "constraints" - } - assert set(inputs) == {"input_ids", "attention_mask"} - assert set(targets_without_constraints) == {"labels", "decoder_attention_mask"} - torch.manual_seed(42) - outputs = model(**inputs, **targets_without_constraints) - loss = outputs.loss - if config == {}: - torch.testing.assert_close(loss, torch.tensor(2.4516539573669434)) - elif config == {"decoder_position_id_mode": "pattern"}: - torch.testing.assert_close(loss, torch.tensor(2.4184868335723877)) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_forward_with_labels_and_constraints(model, batch_with_constraints, config): - inputs, targets = batch_with_constraints - assert set(inputs) == {"input_ids", "attention_mask"} - assert set(targets) == {"labels", "decoder_attention_mask", "constraints"} - torch.manual_seed(42) - outputs = model(**inputs, **targets) - loss = outputs.loss - if config == {}: - torch.testing.assert_close(loss, torch.tensor(4.776531219482422)) - elif config == {"decoder_position_id_mode": "pattern"}: - torch.testing.assert_close(loss, torch.tensor(4.742183685302734)) - else: - raise ValueError(f"Unknown model type {type(model.model)}") - - -@pytest.fixture(scope="module") -def empty_decoder_input_ids(batch, model): - inputs, targets = batch - batch_size, seq_len = inputs["input_ids"].shape - decoder_input_ids = torch.ones((batch_size, 1), dtype=torch.long) * model.config.bos_token_id - torch.testing.assert_close( - decoder_input_ids, - torch.tensor([[0], [0]]), - ) - return decoder_input_ids - - -@pytest.fixture(scope="module") -def encoder_outputs(model, batch): - inputs, targets = batch - torch.manual_seed(42) - encoder_outputs = model.get_encoder()( - input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"] - ) - return encoder_outputs - - -@pytest.fixture(scope="module") -def prepared_encoder_decoder_kwargs_for_generation( - model, batch, empty_decoder_input_ids, encoder_outputs -): - model_kwargs = { - "attention_mask": batch[0]["attention_mask"], - "output_attentions": False, - "output_hidden_states": False, - "use_cache": True, - } - torch.manual_seed(42) - prepared_kwargs = model._prepare_encoder_decoder_kwargs_for_generation( - inputs_tensor=batch[0]["input_ids"], - model_kwargs=model_kwargs, - model_input_name="input_ids", - ) - return prepared_kwargs - - -def test_prepare_encoder_decoder_kwargs_for_generation( - prepared_encoder_decoder_kwargs_for_generation, batch, encoder_outputs -): - model_kwargs = { - "attention_mask": batch[0]["attention_mask"], - "output_attentions": False, - "output_hidden_states": False, - "use_cache": True, - } - - assert set(prepared_encoder_decoder_kwargs_for_generation) == set(model_kwargs) | { - "encoder_input_ids", - "encoder_attention_mask", - "encoder_outputs", - } - torch.testing.assert_close( - prepared_encoder_decoder_kwargs_for_generation["encoder_input_ids"], - batch[0]["input_ids"], - ) - torch.testing.assert_close( - prepared_encoder_decoder_kwargs_for_generation["encoder_attention_mask"], - batch[0]["attention_mask"], - ) - torch.testing.assert_close( - prepared_encoder_decoder_kwargs_for_generation["encoder_outputs"].last_hidden_state, - encoder_outputs.last_hidden_state, - ) - - -def test_prepare_inputs_for_generation( - model, - prepared_encoder_decoder_kwargs_for_generation, - empty_decoder_input_ids, - batch, - encoder_outputs, - config, -): - result = model.prepare_inputs_for_generation( - decoder_input_ids=empty_decoder_input_ids, **prepared_encoder_decoder_kwargs_for_generation - ) - result_keys = { - "input_ids", - "attention_mask", - "encoder_outputs", - "decoder_input_ids", - "decoder_attention_mask", - "past_key_values", - "use_cache", - "head_mask", - "decoder_head_mask", - "cross_attn_head_mask", - } - if model.pointer_head.use_prepared_position_ids: - result_keys.add("decoder_position_ids") - assert set(result) == result_keys - torch.testing.assert_close( - result["input_ids"], - batch[0]["input_ids"], - ) - torch.testing.assert_close( - result["attention_mask"], - batch[0]["attention_mask"], - ) - torch.testing.assert_close( - result["encoder_outputs"].last_hidden_state, - encoder_outputs.last_hidden_state, - ) - torch.testing.assert_close( - result["decoder_input_ids"], - empty_decoder_input_ids, - ) - assert result["decoder_attention_mask"] is None - assert result["past_key_values"] is None - assert result["use_cache"] is True - assert result["head_mask"] is None - assert result["decoder_head_mask"] is None - assert result["cross_attn_head_mask"] is None - if config == {}: - assert "decoder_position_ids" not in result - elif config == {"decoder_position_id_mode": "pattern"}: - torch.testing.assert_close(result["decoder_position_ids"], torch.tensor([[0], [0]])) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_prepare_inputs_for_generation_with_past_key_values( - model, - prepared_encoder_decoder_kwargs_for_generation, - batch, - encoder_outputs, - config, -): - # shallow copy to avoid changing the original dict - kwargs = dict(prepared_encoder_decoder_kwargs_for_generation) - kwargs["decoder_input_ids"] = torch.tensor( - [ - [0, 8, 9], - [0, 8, 10], - [0, 8, 15], - [0, 8, 8], - [0, 9, 10], - [0, 8, 12], - [0, 8, 9], - [0, 8, 10], - [0, 9, 10], - [0, 8, 8], - [0, 9, 9], - [0, 8, 6], - ] - ) - # 12 is batch_size (2) * num_beams (6), - # 16 is number of encoder / decoder attention heads, - # 2 is the length of already generated tokens / 10 is the length of the encoder input, - # 64 seems to be the size of the hidden states - dummy_past_key_values = ( - torch.zeros((12, 16, 2, 64)), - torch.zeros((12, 16, 2, 64)), - torch.zeros((12, 16, 10, 64)), - torch.zeros((12, 16, 10, 64)), - ) - - result = model.prepare_inputs_for_generation(past_key_values=dummy_past_key_values, **kwargs) - if config == {}: - assert len(result) == 10 - elif config == {"decoder_position_id_mode": "pattern"}: - assert len(result) == 11 - else: - raise ValueError(f"Unknown config: {config}") - torch.testing.assert_close( - result["input_ids"], - batch[0]["input_ids"], - ) - torch.testing.assert_close( - result["attention_mask"], - batch[0]["attention_mask"], - ) - torch.testing.assert_close( - result["encoder_outputs"].last_hidden_state, - encoder_outputs.last_hidden_state, - ) - torch.testing.assert_close( - result["decoder_input_ids"], - # just the last id for each entry - torch.tensor([[9], [10], [15], [8], [10], [12], [9], [10], [10], [8], [9], [6]]), - ) - assert result["decoder_attention_mask"] is None - assert result["past_key_values"] is dummy_past_key_values - assert result["use_cache"] is True - assert result["head_mask"] is None - assert result["decoder_head_mask"] is None - assert result["cross_attn_head_mask"] is None - if "decoder_position_ids" in result: - torch.testing.assert_close( - result["decoder_position_ids"], - # originally this was 0 from the pattern, but got shifted for the position-bos and position-pad indices - torch.tensor([[2], [2], [2], [2], [2], [2], [2], [2], [2], [2], [2], [2]]), - ) - - -def test_generate(model, batch, empty_decoder_input_ids, config): - inputs, targets = batch - batch_size, seq_len = inputs["input_ids"].shape - torch.manual_seed(42) - outputs = model.generate(**inputs) - if config == {}: - assert outputs.shape == (batch_size, 20) # note that 20 is the model.config.max_length - torch.testing.assert_close( - outputs, - torch.tensor( - [ - [0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], - [0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], - ] - ), - ) - elif config == {"decoder_position_id_mode": "pattern"}: - assert outputs.shape == (batch_size, 20) # note that 20 is the model.config.max_length - torch.testing.assert_close( - outputs, - torch.tensor( - [ - [0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], - [0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], - ] - ), - ) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_head_named_params(model): - parameter_shapes = {name: tuple(param.shape) for name, param in model.head_named_params()} - assert parameter_shapes == { - "pointer_head.encoder_mlp.0.bias": (24,), - "pointer_head.encoder_mlp.0.weight": (24, 24), - "pointer_head.encoder_mlp.3.bias": (24,), - "pointer_head.encoder_mlp.3.weight": (24, 24), - } - - -def test_encoder_only_named_params(model): - parameter_shapes = { - name: tuple(param.shape) for name, param in model.encoder_only_named_params() - } - assert len(parameter_shapes) == 35 - assert parameter_shapes == { - "model.encoder.embed_positions.weight": (1026, 24), - "model.encoder.layernorm_embedding.bias": (24,), - "model.encoder.layernorm_embedding.weight": (24,), - "model.encoder.layers.0.fc1.bias": (16,), - "model.encoder.layers.0.fc1.weight": (16, 24), - "model.encoder.layers.0.fc2.bias": (24,), - "model.encoder.layers.0.fc2.weight": (24, 16), - "model.encoder.layers.0.final_layer_norm.bias": (24,), - "model.encoder.layers.0.final_layer_norm.weight": (24,), - "model.encoder.layers.0.self_attn.k_proj.bias": (24,), - "model.encoder.layers.0.self_attn.k_proj.weight": (24, 24), - "model.encoder.layers.0.self_attn.out_proj.bias": (24,), - "model.encoder.layers.0.self_attn.out_proj.weight": (24, 24), - "model.encoder.layers.0.self_attn.q_proj.bias": (24,), - "model.encoder.layers.0.self_attn.q_proj.weight": (24, 24), - "model.encoder.layers.0.self_attn.v_proj.bias": (24,), - "model.encoder.layers.0.self_attn.v_proj.weight": (24, 24), - "model.encoder.layers.0.self_attn_layer_norm.bias": (24,), - "model.encoder.layers.0.self_attn_layer_norm.weight": (24,), - "model.encoder.layers.1.fc1.bias": (16,), - "model.encoder.layers.1.fc1.weight": (16, 24), - "model.encoder.layers.1.fc2.bias": (24,), - "model.encoder.layers.1.fc2.weight": (24, 16), - "model.encoder.layers.1.final_layer_norm.bias": (24,), - "model.encoder.layers.1.final_layer_norm.weight": (24,), - "model.encoder.layers.1.self_attn.k_proj.bias": (24,), - "model.encoder.layers.1.self_attn.k_proj.weight": (24, 24), - "model.encoder.layers.1.self_attn.out_proj.bias": (24,), - "model.encoder.layers.1.self_attn.out_proj.weight": (24, 24), - "model.encoder.layers.1.self_attn.q_proj.bias": (24,), - "model.encoder.layers.1.self_attn.q_proj.weight": (24, 24), - "model.encoder.layers.1.self_attn.v_proj.bias": (24,), - "model.encoder.layers.1.self_attn.v_proj.weight": (24, 24), - "model.encoder.layers.1.self_attn_layer_norm.bias": (24,), - "model.encoder.layers.1.self_attn_layer_norm.weight": (24,), - } - - -def test_decoder_only_named_params(model): - parameter_shapes = { - name: tuple(param.shape) for name, param in model.decoder_only_named_params() - } - assert len(parameter_shapes) == 55 - assert parameter_shapes == { - "model.decoder.embed_positions.weight": (1026, 24), - "model.decoder.layernorm_embedding.bias": (24,), - "model.decoder.layernorm_embedding.weight": (24,), - "model.decoder.layers.0.encoder_attn.k_proj.bias": (24,), - "model.decoder.layers.0.encoder_attn.k_proj.weight": (24, 24), - "model.decoder.layers.0.encoder_attn.out_proj.bias": (24,), - "model.decoder.layers.0.encoder_attn.out_proj.weight": (24, 24), - "model.decoder.layers.0.encoder_attn.q_proj.bias": (24,), - "model.decoder.layers.0.encoder_attn.q_proj.weight": (24, 24), - "model.decoder.layers.0.encoder_attn.v_proj.bias": (24,), - "model.decoder.layers.0.encoder_attn.v_proj.weight": (24, 24), - "model.decoder.layers.0.encoder_attn_layer_norm.bias": (24,), - "model.decoder.layers.0.encoder_attn_layer_norm.weight": (24,), - "model.decoder.layers.0.fc1.bias": (16,), - "model.decoder.layers.0.fc1.weight": (16, 24), - "model.decoder.layers.0.fc2.bias": (24,), - "model.decoder.layers.0.fc2.weight": (24, 16), - "model.decoder.layers.0.final_layer_norm.bias": (24,), - "model.decoder.layers.0.final_layer_norm.weight": (24,), - "model.decoder.layers.0.self_attn.k_proj.bias": (24,), - "model.decoder.layers.0.self_attn.k_proj.weight": (24, 24), - "model.decoder.layers.0.self_attn.out_proj.bias": (24,), - "model.decoder.layers.0.self_attn.out_proj.weight": (24, 24), - "model.decoder.layers.0.self_attn.q_proj.bias": (24,), - "model.decoder.layers.0.self_attn.q_proj.weight": (24, 24), - "model.decoder.layers.0.self_attn.v_proj.bias": (24,), - "model.decoder.layers.0.self_attn.v_proj.weight": (24, 24), - "model.decoder.layers.0.self_attn_layer_norm.bias": (24,), - "model.decoder.layers.0.self_attn_layer_norm.weight": (24,), - "model.decoder.layers.1.encoder_attn.k_proj.bias": (24,), - "model.decoder.layers.1.encoder_attn.k_proj.weight": (24, 24), - "model.decoder.layers.1.encoder_attn.out_proj.bias": (24,), - "model.decoder.layers.1.encoder_attn.out_proj.weight": (24, 24), - "model.decoder.layers.1.encoder_attn.q_proj.bias": (24,), - "model.decoder.layers.1.encoder_attn.q_proj.weight": (24, 24), - "model.decoder.layers.1.encoder_attn.v_proj.bias": (24,), - "model.decoder.layers.1.encoder_attn.v_proj.weight": (24, 24), - "model.decoder.layers.1.encoder_attn_layer_norm.bias": (24,), - "model.decoder.layers.1.encoder_attn_layer_norm.weight": (24,), - "model.decoder.layers.1.fc1.bias": (16,), - "model.decoder.layers.1.fc1.weight": (16, 24), - "model.decoder.layers.1.fc2.bias": (24,), - "model.decoder.layers.1.fc2.weight": (24, 16), - "model.decoder.layers.1.final_layer_norm.bias": (24,), - "model.decoder.layers.1.final_layer_norm.weight": (24,), - "model.decoder.layers.1.self_attn.k_proj.bias": (24,), - "model.decoder.layers.1.self_attn.k_proj.weight": (24, 24), - "model.decoder.layers.1.self_attn.out_proj.bias": (24,), - "model.decoder.layers.1.self_attn.out_proj.weight": (24, 24), - "model.decoder.layers.1.self_attn.q_proj.bias": (24,), - "model.decoder.layers.1.self_attn.q_proj.weight": (24, 24), - "model.decoder.layers.1.self_attn.v_proj.bias": (24,), - "model.decoder.layers.1.self_attn.v_proj.weight": (24, 24), - "model.decoder.layers.1.self_attn_layer_norm.bias": (24,), - "model.decoder.layers.1.self_attn_layer_norm.weight": (24,), - } - - -def test_encoder_decoder_shared_named_params(model): - parameter_shapes = { - name: tuple(param.shape) for name, param in model.encoder_decoder_shared_named_params() - } - assert len(parameter_shapes) == 1 - assert parameter_shapes == {"model.shared.weight": (50270, 24)} - - -def test_base_model_named_params(model): - parameter_shapes = { - name: tuple(param.shape) for name, param in model.base_model_named_params() - } - assert len(parameter_shapes) == 91 - encoder_only_parameter_shapes = { - name: tuple(param.shape) for name, param in model.encoder_only_named_params() - } - decoder_only_parameter_shapes = { - name: tuple(param.shape) for name, param in model.decoder_only_named_params() - } - shared_parameter_shapes = { - name: tuple(param.shape) for name, param in model.encoder_decoder_shared_named_params() - } - expected_parameter_shapes = { - **encoder_only_parameter_shapes, - **decoder_only_parameter_shapes, - **shared_parameter_shapes, - } - - assert parameter_shapes == expected_parameter_shapes - - -def test_configure_optimizer(model): - optimizer = model.configure_optimizer() - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults["lr"] == 0.001 - assert optimizer.defaults["weight_decay"] == model.config.weight_decay == 0.01 - assert len(optimizer.param_groups) == 6 - assert all(param_group["lr"] == model.config.lr for param_group in optimizer.param_groups) - - # head parameters - assert optimizer.param_groups[0]["weight_decay"] == model.config.weight_decay == 0.01 - # decoder only layer norm parameters - assert optimizer.param_groups[1]["weight_decay"] == model.config.weight_decay == 0.01 - # decoder only other parameters - assert optimizer.param_groups[2]["weight_decay"] == model.config.weight_decay == 0.01 - # encoder only layer norm parameters - assert ( - optimizer.param_groups[3]["weight_decay"] == model.config.encoder_layer_norm_decay == 0.001 - ) - # encoder only other parameters - assert optimizer.param_groups[4]["weight_decay"] == model.config.weight_decay == 0.01 - # encoder-decoder shared parameters - assert optimizer.param_groups[5]["weight_decay"] == model.config.weight_decay == 0.01 - - all_optimized_parameters = set() - for param_group in optimizer.param_groups: - all_optimized_parameters.update(set(param_group["params"])) - assert len(all_optimized_parameters) > 0 - # check that all model parameters are covered - all_model_parameters = {param for name, param in model.named_parameters()} - assert all_optimized_parameters == all_model_parameters - - -# note that this is only used for the tests below which are marked as slow -# and are primarily meant to show how beam search works -@pytest.fixture(scope="module") -def pretrained_model() -> BartAsPointerNetwork: - torch.random.manual_seed(42) - model = BartAsPointerNetwork.from_pretrained( - "sshleifer/distilbart-xsum-12-1", - # label id space - bos_token_id=0, # taskmodule.bos_id, - eos_token_id=1, # taskmodule.eos_id, - pad_token_id=1, # taskmodule.eos_id, - # target token id space - target_token_ids=[0, 2, 50266, 50269, 50268, 50265, 50267], # taskmodule.target_token_ids, - # mapping to better initialize the label embedding weights - # taken from taskmodule.label_embedding_weight_mapping - embedding_weight_mapping={ - 50266: [39763], - 50269: [10166], - 50268: [5970], - 50265: [45260], - 50267: [354, 1215, 9006], - }, - decoder_position_id_mode="pattern", - decoder_position_id_pattern=[0, 0, 1, 0, 0, 1, 1], - ) - - return model - - -ARTICLE_TO_SUMMARIZE = ( - "PG&E stated it scheduled the blackouts in response to forecasts for high winds " - "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were " - "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow." -) - - -@pytest.mark.slow -def test_bart_pointer_network_beam_search(pretrained_model, taskmodule): - model = pretrained_model - encoder_input_str = ARTICLE_TO_SUMMARIZE # "translate English to German: How old are you?" - encoder_input_tokenized = taskmodule.tokenizer(encoder_input_str, return_tensors="pt") - - # lets run beam search using 3 beams - num_beams = 3 - # define decoder start token ids - decoder_input_ids = torch.ones((num_beams, 1), device=model.device, dtype=torch.long) - decoder_input_ids = decoder_input_ids * model.config.decoder_start_token_id - - # add encoder_outputs to model keyword arguments - encoder = model.get_encoder() - encoder_input_ids = encoder_input_tokenized.input_ids.repeat_interleave(num_beams, dim=0) - encoder_attention_mask = encoder_input_tokenized.attention_mask.repeat_interleave( - num_beams, dim=0 - ) - torch.manual_seed(42) - encoder_outputs = encoder(encoder_input_ids, return_dict=True) - model_kwargs = { - "encoder_outputs": encoder_outputs, - "encoder_input_ids": encoder_input_ids, - "encoder_attention_mask": encoder_attention_mask, - } - - # instantiate beam scorer - beam_scorer = BeamSearchScorer( - batch_size=1, - num_beams=num_beams, - device=model.device, - ) - - # instantiate logits processors - logits_processor = LogitsProcessorList( - [ - MinLengthLogitsProcessor(5, eos_token_id=model.config.eos_token_id), - ] - ) - - torch.manual_seed(42) - outputs = model.beam_search( - decoder_input_ids, - beam_scorer, - logits_processor=logits_processor, - pad_token_id=model.config.pad_token_id, - eos_token_id=model.config.eos_token_id, - max_length=20, - **model_kwargs, - ) - - torch.testing.assert_close( - outputs, - torch.tensor( - [[0, 10, 30, 53, 54, 45, 15, 16, 17, 33, 33, 33, 35, 33, 58, 39, 41, 35, 33, 35]] - ), - ) - - # result = tokenizer.batch_decode(outputs, skip_special_tokens=True) - # assert result == [ - # " power lines in California have been shut down after a power provider said it was due to high winds." - # ] - - -@pytest.mark.slow -def test_bart_pointer_network_generate_with_scores(pretrained_model, taskmodule): - model = pretrained_model - encoder_input_str = ARTICLE_TO_SUMMARIZE # "translate English to German: How old are you?" - inputs = taskmodule.tokenizer(encoder_input_str, max_length=1024, return_tensors="pt") - - torch.manual_seed(42) - outputs = model.generate( - inputs["input_ids"], - num_beams=3, - min_length=5, - max_length=20, - return_dict_in_generate=True, - output_scores=True, - ) - assert isinstance(outputs, BeamSearchEncoderDecoderOutput) - torch.testing.assert_close(outputs.sequences_scores, torch.tensor([-8.088160514831543])) - torch.testing.assert_close( - outputs.sequences, - torch.tensor( - [[0, 10, 30, 53, 54, 45, 15, 16, 17, 33, 33, 33, 35, 33, 58, 39, 41, 35, 33, 35]] - ), - ) - - # result = tokenizer.batch_decode( - # summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False - # ) - # assert result == [" power lines in California have been shut down on Friday."] diff --git a/tests/models/base_models/test_bart_with_decoder_position_ids.py b/tests/models/base_models/test_bart_with_decoder_position_ids.py deleted file mode 100644 index 0b814c64b..000000000 --- a/tests/models/base_models/test_bart_with_decoder_position_ids.py +++ /dev/null @@ -1,301 +0,0 @@ -import pytest -import torch -from torch.nn import Embedding -from transformers import BartConfig -from transformers.modeling_outputs import BaseModelOutputWithPastAndCrossAttentions -from transformers.models.bart.modeling_bart import BartEncoder - -from pie_modules.models.base_models import BartModelWithDecoderPositionIds -from pie_modules.models.base_models.bart_with_decoder_position_ids import ( - BartDecoderWithPositionIds, - BartLearnedPositionalEmbeddingWithPositionIds, -) - - -def test_bart_learned_positional_embedding_with_position_ids(): - # Arrange - torch.manual_seed(42) - model = BartLearnedPositionalEmbeddingWithPositionIds(10, 6) - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]) - position_ids_original = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]) - position_ids_different = torch.tensor([[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]]) - - # Act - original = model(input_ids=input_ids) - replaced_original = model(input_ids=input_ids, position_ids=position_ids_original) - replaced_different = model(input_ids=input_ids, position_ids=position_ids_different) - - # Assert - assert original.shape == (1, 10, 6) - assert replaced_original.shape == (1, 10, 6) - torch.testing.assert_close(original, replaced_original) - assert replaced_different.shape == (1, 10, 6) - assert not torch.allclose(original, replaced_different) - - -@pytest.fixture(scope="module") -def bart_config(): - return BartConfig( - vocab_size=30, - d_model=10, - encoder_layers=1, - decoder_layers=1, - encoder_attention_heads=2, - decoder_attention_heads=2, - encoder_ffn_dim=20, - decoder_ffn_dim=20, - max_position_embeddings=10, - ) - - -@pytest.fixture(scope="module") -def bart_decoder_with_position_ids(bart_config): - return BartDecoderWithPositionIds(config=bart_config) - - -def test_bart_decoder_with_position_ids(bart_decoder_with_position_ids): - assert bart_decoder_with_position_ids is not None - - -def test_bart_decoder_with_position_ids_get_input_embeddings(bart_decoder_with_position_ids): - input_embeddings = bart_decoder_with_position_ids.get_input_embeddings() - assert input_embeddings is not None - assert isinstance(input_embeddings, Embedding) - assert input_embeddings.embedding_dim == 10 - assert input_embeddings.num_embeddings == 30 - - -def test_bart_decoder_with_position_ids_set_input_embeddings(bart_decoder_with_position_ids): - original_input_embeddings = bart_decoder_with_position_ids.get_input_embeddings() - torch.manual_seed(42) - new_input_embeddings = Embedding( - original_input_embeddings.num_embeddings, original_input_embeddings.embedding_dim - ) - bart_decoder_with_position_ids.set_input_embeddings(new_input_embeddings) - input_embeddings = bart_decoder_with_position_ids.get_input_embeddings() - assert input_embeddings == new_input_embeddings - assert input_embeddings is not original_input_embeddings - # recover original input embeddings - bart_decoder_with_position_ids.set_input_embeddings(original_input_embeddings) - - -def test_bart_decoder_with_position_ids_forward(bart_decoder_with_position_ids): - # Arrange - model = bart_decoder_with_position_ids - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]) - position_ids_original = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]) - position_ids_different = torch.tensor([[0, 0, 0, 1, 1, 1, 2, 2]]) - - # Act - torch.manual_seed(42) - original = model(input_ids=input_ids) - torch.manual_seed(42) - replaced_original = model(input_ids=input_ids, position_ids=position_ids_original) - torch.manual_seed(42) - replaced_different = model(input_ids=input_ids, position_ids=position_ids_different) - - # Assert - assert isinstance(original, BaseModelOutputWithPastAndCrossAttentions) - assert original.last_hidden_state.shape == (1, 8, 10) - assert isinstance(replaced_original, BaseModelOutputWithPastAndCrossAttentions) - torch.testing.assert_close(original.last_hidden_state, replaced_original.last_hidden_state) - - assert isinstance(replaced_different, BaseModelOutputWithPastAndCrossAttentions) - assert replaced_different.last_hidden_state.shape == (1, 8, 10) - assert not torch.allclose(original.last_hidden_state, replaced_different.last_hidden_state) - - -def test_bart_decoder_with_position_ids_forward_with_inputs_embeds(bart_decoder_with_position_ids): - # Arrange - model = bart_decoder_with_position_ids - inputs_embeds = torch.randn(1, 8, 10) - position_ids_original = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]) - position_ids_different = torch.tensor([[0, 0, 0, 1, 1, 1, 2, 2]]) - - # Act - torch.manual_seed(42) - original = model(inputs_embeds=inputs_embeds) - torch.manual_seed(42) - replaced_original = model(inputs_embeds=inputs_embeds, position_ids=position_ids_original) - torch.manual_seed(42) - replaced_different = model(inputs_embeds=inputs_embeds, position_ids=position_ids_different) - - # Assert - assert isinstance(original, BaseModelOutputWithPastAndCrossAttentions) - assert original.last_hidden_state.shape == (1, 8, 10) - assert isinstance(replaced_original, BaseModelOutputWithPastAndCrossAttentions) - torch.testing.assert_close(original.last_hidden_state, replaced_original.last_hidden_state) - - assert isinstance(replaced_different, BaseModelOutputWithPastAndCrossAttentions) - assert replaced_different.last_hidden_state.shape == (1, 8, 10) - assert not torch.allclose(original.last_hidden_state, replaced_different.last_hidden_state) - - -def test_bart_decoder_with_position_ids_forward_wrong_position_ids_shape( - bart_decoder_with_position_ids, -): - # Arrange - model = bart_decoder_with_position_ids - input_ids = torch.tensor([[0, 1, 2, 3]]) - position_ids_wrong_shape = torch.tensor([[0, 1, 2]]) - - # Act - torch.manual_seed(42) - with pytest.raises(ValueError) as excinfo: - model(input_ids=input_ids, position_ids=position_ids_wrong_shape) - assert ( - str(excinfo.value) - == "Position IDs shape torch.Size([1, 3]) does not match input ids shape torch.Size([1, 4])." - ) - - -@pytest.fixture(scope="module") -def bart_model_with_decoder_position_ids(bart_config): - torch.manual_seed(42) - model = BartModelWithDecoderPositionIds(config=bart_config) - model.train() - return model - - -def test_bart_model_with_decoder_position_ids(bart_model_with_decoder_position_ids): - assert bart_model_with_decoder_position_ids is not None - - -def test_bart_model_with_decoder_position_ids_get_input_embeddings( - bart_model_with_decoder_position_ids, -): - input_embeddings = bart_model_with_decoder_position_ids.get_input_embeddings() - assert input_embeddings is not None - assert isinstance(input_embeddings, Embedding) - assert input_embeddings.embedding_dim == 10 - assert input_embeddings.num_embeddings == 30 - - -def test_bart_model_with_decoder_position_ids_set_input_embeddings( - bart_model_with_decoder_position_ids, -): - original_input_embeddings = bart_model_with_decoder_position_ids.get_input_embeddings() - torch.manual_seed(42) - new_input_embeddings = Embedding( - original_input_embeddings.num_embeddings, original_input_embeddings.embedding_dim - ) - bart_model_with_decoder_position_ids.set_input_embeddings(new_input_embeddings) - input_embeddings = bart_model_with_decoder_position_ids.get_input_embeddings() - assert input_embeddings == new_input_embeddings - assert input_embeddings is not original_input_embeddings - # recover original input embeddings - bart_model_with_decoder_position_ids.set_input_embeddings(original_input_embeddings) - - -def test_bart_model_with_decoder_position_ids_get_encoder(bart_model_with_decoder_position_ids): - encoder = bart_model_with_decoder_position_ids.get_encoder() - assert encoder is not None - assert isinstance(encoder, BartEncoder) - - -def test_bart_model_with_decoder_position_ids_get_decoder(bart_model_with_decoder_position_ids): - decoder = bart_model_with_decoder_position_ids.get_decoder() - assert decoder is not None - assert isinstance(decoder, BartDecoderWithPositionIds) - - -@pytest.mark.parametrize( - "return_dict, prepare_encoder_outputs, output_everything", - [(True, True, True), (False, False, False)], -) -def test_bart_model_with_decoder_position_forward( - bart_model_with_decoder_position_ids, return_dict, prepare_encoder_outputs, output_everything -): - model = bart_model_with_decoder_position_ids - - # Arrange - model.eval() - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]) - position_ids_original = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]) - position_ids_different = torch.tensor([[0, 0, 0, 1, 1, 1, 2, 2]]) - common_kwargs = {"input_ids": input_ids, "return_dict": return_dict} - if prepare_encoder_outputs: - common_kwargs["encoder_outputs"] = bart_model_with_decoder_position_ids.get_encoder()( - input_ids=input_ids, return_dict=False - ) - else: - common_kwargs["encoder_outputs"] = None - if output_everything: - common_kwargs["output_attentions"] = True - common_kwargs["output_hidden_states"] = True - - # Act - original = model(**common_kwargs)[0] - replaced_original = model( - decoder_position_ids=position_ids_original, - **common_kwargs, - )[0] - replaced_different = model(decoder_position_ids=position_ids_different, **common_kwargs)[0] - - # Assert - assert isinstance(original, torch.FloatTensor) - assert original.shape == (1, 8, 10) - torch.testing.assert_close( - original[0, :5, :3], - torch.tensor( - [ - [0.7589594721794128, 1.0452316999435425, 0.7063764333724976], - [-0.12192550301551819, -0.9932114481925964, -0.722382664680481], - [0.24711951613426208, -0.291597843170166, -1.0466505289077759], - [1.1228691339492798, -0.0873560905456543, 1.534016728401184], - [-1.1132177114486694, 0.2277398556470871, 1.6456809043884277], - ] - ), - ) - torch.testing.assert_close( - original.sum(dim=-1), - torch.tensor( - [ - [ - 0.0, - -1.1920928955078125e-07, - -1.1920928955078125e-07, - -2.682209014892578e-07, - 5.960464477539063e-08, - 5.960464477539063e-08, - 2.384185791015625e-07, - -5.960464477539063e-08, - ] - ] - ), - ) - assert isinstance(replaced_original, torch.FloatTensor) - torch.testing.assert_close(original, replaced_original) - - assert isinstance(replaced_different, torch.FloatTensor) - assert replaced_different.shape == (1, 8, 10) - torch.testing.assert_close( - replaced_different[0, :5, :3], - torch.tensor( - [ - [0.7589594721794128, 1.0452316999435425, 0.7063764333724976], - [-0.0127173513174057, -0.8127143383026123, -1.256797194480896], - [1.0517312288284302, 0.037927787750959396, -0.28661563992500305], - [0.5884698629379272, 0.9930593371391296, 1.3842554092407227], - [0.6132885813713074, -1.0105736255645752, 2.361264228820801], - ] - ), - ) - torch.testing.assert_close( - replaced_different.sum(dim=-1), - torch.tensor( - [ - [ - 0.0, - -2.384185791015625e-07, - -1.7881393432617188e-07, - 2.5331974029541016e-07, - 1.4901161193847656e-07, - 1.1920928955078125e-07, - -1.1920928955078125e-07, - -1.7881393432617188e-07, - ] - ] - ), - ) - assert not torch.allclose(replaced_different, original) diff --git a/tests/models/components/__init__.py b/tests/models/components/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/models/components/test_pointer_head.py b/tests/models/components/test_pointer_head.py deleted file mode 100644 index eb2650f68..000000000 --- a/tests/models/components/test_pointer_head.py +++ /dev/null @@ -1,713 +0,0 @@ -import pytest -import torch -from torch import nn - -from pie_modules.models.components.pointer_head import PointerHead - - -def get_pointer_head(num_embeddings=120, embedding_dim=3, eos_id=1, pad_id=2, **kwargs): - torch.manual_seed(42) - return PointerHead( - embeddings=nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim), - # bos, eos, pad, 3 x label ids - target_token_ids=[100, 101, 102, 110, 111, 112], - bos_id=0, # -> 100 - eos_id=eos_id, # 1 (default) -> 101 - pad_id=pad_id, # 2 (default) -> 102 - embedding_weight_mapping={ - "110": [20, 21], - "111": [30], - }, - use_encoder_mlp=True, - use_constraints_encoder_mlp=True, - **kwargs, - ) - - -def test_get_pointer_head(): - pointer_head = get_pointer_head() - assert pointer_head is not None - assert not pointer_head.use_prepared_position_ids - - -def test_set_embeddings(): - pointer_head = get_pointer_head() - original_embeddings = pointer_head.embeddings - new_embeddings = nn.Embedding( - original_embeddings.num_embeddings, original_embeddings.embedding_dim - ) - pointer_head.set_embeddings(new_embeddings) - assert pointer_head.embeddings is not None - assert pointer_head.embeddings != original_embeddings - assert pointer_head.embeddings == new_embeddings - - -def test_overwrite_embeddings_with_mapping(): - pointer_head = get_pointer_head() - original_embeddings_weight = pointer_head.embeddings.weight.clone() - pointer_head.overwrite_embeddings_with_mapping() - assert pointer_head.embeddings is not None - assert not torch.equal(pointer_head.embeddings.weight, original_embeddings_weight) - torch.testing.assert_close( - pointer_head.embeddings.weight[110], original_embeddings_weight[[20, 21]].mean(dim=0) - ) - torch.testing.assert_close( - pointer_head.embeddings.weight[111], original_embeddings_weight[[30]].mean(dim=0) - ) - - -@pytest.mark.parametrize( - "use_attention_mask", - [True, False], -) -def test_prepare_decoder_input_ids(use_attention_mask): - pointer_head = get_pointer_head() - encoder_input_ids = torch.tensor( - [ - [10, 11, 12, 13, 14, 15], - [20, 21, 22, 23, 24, 0], - ] - ).to(torch.long) - # we have 3 special tokens (bos, eos, pad) and 3 labels, so the offset is 6 - input_ids = torch.tensor( - [ - # bos, offset (0=6-6), offset (1=7-6), label (3), label (4), offset (2=8-6) - [0, 6, 7, 3, 4, 8], - # bos, label (3), offset (3=9-6), eos, pad, pad - [0, 3, 9, 1, 2, 2], - ] - ).to(torch.long) - # this is the attention mask for the (decoder) input_ids, not the encoder_input_ids - attention_mask = ( - torch.tensor( - [ - [1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 0, 0], - ] - ).to(torch.long) - if use_attention_mask - else None - ) - - prepared_decoder_input_ids = pointer_head.prepare_decoder_input_ids( - input_ids=input_ids, - encoder_input_ids=encoder_input_ids, - ) - assert prepared_decoder_input_ids is not None - assert prepared_decoder_input_ids.shape == input_ids.shape - # to recap, the target2token_id mapping is (bos, eos, pad, 3 x label ids) - torch.testing.assert_close( - pointer_head.target2token_id, torch.tensor([100, 101, 102, 110, 111, 112]) - ) - # 3 labels + bos / pad - assert pointer_head.pointer_offset == 6 - assert prepared_decoder_input_ids.tolist() == [ - # bos (0), offset (0=6-6), offset (1=7-6), label (3), label (4), offset (2=8-6) - [100, 10, 11, 110, 111, 12], - # bos (0), label (3), offset (3=9-6), eos (1), pad (2), pad (2) - [100, 110, 23, 101, 102, 102], - ] - - -def test_prepare_decoder_input_ids_out_of_bounds(): - pointer_head = get_pointer_head() - # 3 labels + bos / pad - assert pointer_head.pointer_offset == 6 - encoder_input_ids = torch.tensor( - [ - [100, 101, 102], - ] - ).to(torch.long) - input_ids = torch.tensor( - [ - # 9 is out of bounds: > pointer_head.pointer_offset + len(encoder_input_ids) - [0, 9], - ] - ).to(torch.long) - - with pytest.raises(ValueError) as excinfo: - pointer_head.prepare_decoder_input_ids( - input_ids=input_ids, encoder_input_ids=encoder_input_ids - ) - assert str(excinfo.value) == ( - "encoder_input_ids_index.max() [3] must be smaller than encoder_input_length [3]!" - ) - - -@pytest.mark.parametrize( - "decoder_position_id_mode", - ["pattern", "pattern_with_increment", "mapping"], -) -def test_prepare_decoder_position_ids(decoder_position_id_mode): - pointer_head = get_pointer_head( - decoder_position_id_mode=decoder_position_id_mode, - decoder_position_id_pattern=[0, 1, 1, 2], - decoder_position_id_mapping={"default": 3, "vocab": 2, "bos": 0, "eos": 0, "pad": 1}, - ) - input_ids = torch.tensor( - [ - # bos, offset (0=6-6), offset (1=7-6), label (3), label (4), offset (2=8-6) - [0, 6, 7, 3, 4, 8], - # bos, label (3), offset (3=9-6), eos, pad, pad - [0, 3, 9, 1, 2, 2], - ] - ).to(torch.long) - - prepared_decoder_position_ids = pointer_head.prepare_decoder_position_ids(input_ids=input_ids) - assert prepared_decoder_position_ids is not None - assert prepared_decoder_position_ids.shape == input_ids.shape - if decoder_position_id_mode == "pattern": - assert prepared_decoder_position_ids.tolist() == [ - [0, 2, 3, 3, 4, 2], - [0, 2, 3, 3, 1, 1], - ] - elif decoder_position_id_mode == "pattern_with_increment": - # the position ids (except for position-bos=0 and position-pad=1) get increased by 3 per record - # (which has length 4) - assert prepared_decoder_position_ids.tolist() == [ - [0, 2, 3, 3, 4, 5], - [0, 2, 3, 3, 1, 1], - ] - elif decoder_position_id_mode == "mapping": - assert prepared_decoder_position_ids.tolist() == [ - [0, 3, 3, 2, 2, 3], - [0, 2, 3, 0, 1, 1], - ] - else: - raise ValueError(f"unknown decoder_position_id_mode={decoder_position_id_mode}") - - -def test_prepare_decoder_position_ids_unknown_mode(): - with pytest.raises(ValueError) as excinfo: - get_pointer_head(decoder_position_id_mode="unknown") - assert str(excinfo.value) == ( - 'decoder_position_id_mode="unknown" is not supported, use one of "pattern", ' - '"pattern_with_increment", or "mapping"!' - ) - - -@pytest.mark.parametrize( - "decoder_position_id_mode", - ["pattern", "pattern_with_increment", "mapping"], -) -def test_prepare_decoder_position_ids_missing_parameter(decoder_position_id_mode): - with pytest.raises(ValueError) as excinfo: - get_pointer_head(decoder_position_id_mode=decoder_position_id_mode) - if decoder_position_id_mode in ["pattern", "pattern_with_increment"]: - assert ( - str(excinfo.value) == "decoder_position_id_pattern must be provided when using " - 'decoder_position_id_mode="pattern" or "pattern_with_increment"!' - ) - elif decoder_position_id_mode == "mapping": - assert ( - str(excinfo.value) - == 'decoder_position_id_mode="mapping" requires decoder_position_id_mapping to be provided!' - ) - else: - raise ValueError(f"unknown decoder_position_id_mode={decoder_position_id_mode}") - - -def test_prepare_decoder_position_ids_with_wrong_mapping(): - input_ids = torch.tensor( - [ - # bos, offset (0=6-6), offset (1=7-6), label (3), label (4), offset (2=8-6) - [0, 6, 7, 3, 4, 8], - # bos, label (3), offset (3=9-6), eos, pad, pad - [0, 3, 9, 1, 2, 2], - ] - ).to(torch.long) - - # missing default - pointer_head = get_pointer_head( - decoder_position_id_mode="mapping", - decoder_position_id_mapping={"vocab": 2, "bos": 0, "eos": 0, "pad": 1}, - ) - with pytest.raises(ValueError) as excinfo: - pointer_head.prepare_decoder_position_ids(input_ids=input_ids) - assert ( - str(excinfo.value) - == "mapping must contain a default entry, but only contains ['vocab', 'bos', 'eos', 'pad']!" - ) - - # unknown key - pointer_head = get_pointer_head( - decoder_position_id_mode="mapping", - decoder_position_id_mapping={ - "default": 3, - "vocab": 2, - "bos": 0, - "eos": 0, - "pad": 1, - "unknown": 4, - }, - ) - with pytest.raises(ValueError) as excinfo: - pointer_head.prepare_decoder_position_ids(input_ids=input_ids) - assert ( - str(excinfo.value) == "Mapping contains unknown key 'unknown' " - "(mapping: {'default': 3, 'vocab': 2, 'bos': 0, 'eos': 0, 'pad': 1, 'unknown': 4})." - ) - - # multiple values for same input id - pointer_head = get_pointer_head( - # same id for eos and pad - eos_id=1, - pad_id=1, - decoder_position_id_mode="mapping", - decoder_position_id_mapping={ - "default": 3, - "vocab": 2, - "bos": 0, - # different position ids for eos and pad, this is not allowed when eos and pad have the same id - "eos": 0, - "pad": 1, - }, - ) - with pytest.raises(ValueError) as excinfo: - pointer_head.prepare_decoder_position_ids(input_ids=input_ids) - assert ( - str(excinfo.value) - == "Can not set the position ids for 'pad' to 1 because it was already set to 0 by key 'eos'. " - "Note that both, 'pad' and 'eos', have the same id (1), so their position_ids need to be " - "also the same (position id mapping: {'default': 3, 'vocab': 2, 'bos': 0, 'eos': 0, 'pad': 1})." - ) - - -def test_prepare_decoder_inputs(): - pointer_head = get_pointer_head( - decoder_position_id_mode="pattern", decoder_position_id_pattern=[0, 1, 1, 2] - ) - encoder_input_ids = torch.tensor( - [ - [10, 11, 12, 13, 14, 15], - [20, 21, 22, 23, 24, 0], - ] - ).to(torch.long) - input_ids = torch.tensor( - [ - # bos, offset (0=6-6), offset (1=7-6), label (3), label (4), offset (2=8-6) - [0, 6, 7, 3, 4, 8], - # bos, label (3), offset (3=9-6), eos, pad, pad - [0, 3, 9, 1, 2, 2], - ] - ).to(torch.long) - - decoder_inputs = pointer_head.prepare_decoder_inputs( - input_ids=input_ids, - encoder_input_ids=encoder_input_ids, - ) - assert set(decoder_inputs.keys()) == {"input_ids", "position_ids"} - assert decoder_inputs["input_ids"].shape == input_ids.shape - assert decoder_inputs["position_ids"].shape == input_ids.shape - # to recap, the target2token_id mapping is (bos, eos, pad, 3 x label ids) - torch.testing.assert_close( - pointer_head.target2token_id, torch.tensor([100, 101, 102, 110, 111, 112]) - ) - # 3 labels + bos / pad - assert pointer_head.pointer_offset == 6 - assert decoder_inputs["input_ids"].tolist() == [ - # bos (0), offset (0=6-6), offset (1=7-6), label (3), label (4), offset (2=8-6) - [100, 10, 11, 110, 111, 12], - # bos (0), label (3), offset (3=9-6), eos (1), pad (2), pad (2) - [100, 110, 23, 101, 102, 102], - ] - assert decoder_inputs["position_ids"].tolist() == [ - [0, 2, 3, 3, 4, 2], - [0, 2, 3, 3, 1, 1], - ] - - -def test_forward(): - pointer_head = get_pointer_head() - # shape: (batch_size=2, input_sequence_length=5) - encoder_input_ids = torch.tensor( - [ - [10, 11, 12, 13, 14], - [20, 21, 22, 23, 0], - ] - ).to(torch.long) - encoder_attention_mask = torch.tensor( - [ - [1, 1, 1, 1, 1], - [1, 1, 1, 1, 0], - ] - ).to(torch.long) - # shape: (batch_size=2, input_sequence_length=5, hidden_size=3) - encoder_last_hidden_state = pointer_head.embeddings(encoder_input_ids) - # shape: (batch_size=2, target_sequence_length=4) - prepared_input_ids = torch.tensor( - [ - # bos (0), offset (0=6-6), offset (1=7-6), label (3) - [100, 10, 11, 110], - # bos (0), label (3), offset (3=9-6), eos (1) - [100, 110, 23, 101], - ] - ).to(torch.long) - # shape: (batch_size=2, target_sequence_length=4) - last_hidden_state = pointer_head.embeddings(prepared_input_ids) - - torch.manual_seed(42) - logits, loss = pointer_head( - encoder_input_ids=encoder_input_ids, - encoder_attention_mask=encoder_attention_mask, - encoder_last_hidden_state=encoder_last_hidden_state, - last_hidden_state=last_hidden_state, - ) - assert loss is None - assert logits is not None - # shape: (batch_size=2, target_sequence_length=4, num_targets+num_offsets=6+5==11) - assert logits.shape == (2, 4, 11) - torch.testing.assert_close( - logits, - torch.tensor( - [ - [ - [ - -1.0000000138484279e24, - -0.9407045245170593, - -1.0000000138484279e24, - 0.5535521507263184, - 0.04295700043439865, - 1.0467679500579834, - -1.110795497894287, - 1.1652655601501465, - 0.09444020688533783, - 0.43052661418914795, - -1.0437036752700806, - ], - [ - -1.0000000138484279e24, - 1.1563994884490967, - -1.0000000138484279e24, - -0.8941665887832642, - -0.6862093806266785, - -1.154745101928711, - 1.6984729766845703, - -1.3889904022216797, - -0.4076152741909027, - -1.0112841129302979, - 0.9846026301383972, - ], - [ - -1.0000000138484279e24, - -1.9377808570861816, - -1.0000000138484279e24, - 2.437451124191284, - 0.041493892669677734, - 0.5383729338645935, - -1.5238577127456665, - 1.6700562238693237, - -0.07231226563453674, - 1.0911093950271606, - -0.9189060926437378, - ], - [ - -1.0000000138484279e24, - -1.880744218826294, - -1.0000000138484279e24, - 3.8719429969787598, - 0.07287894189357758, - -1.3378281593322754, - -0.653921365737915, - 0.783344566822052, - -0.3344290256500244, - 1.3571363687515259, - 0.5505899786949158, - ], - ], - [ - [ - -1.0000000138484279e24, - -0.9407045245170593, - -1.0000000138484279e24, - 0.5535521507263184, - 0.04295700043439865, - 1.0467679500579834, - -1.0019789934158325, - 0.6891120672225952, - -0.002076566219329834, - 0.7561025619506836, - -1.0000000331813535e32, - ], - [ - -1.0000000138484279e24, - -1.880744218826294, - -1.0000000138484279e24, - 3.8719429969787598, - 0.07287894189357758, - -1.3378281593322754, - -1.3875324726104736, - -2.124865770339966, - -2.559859275817871, - 0.5425653457641602, - -1.0000000331813535e32, - ], - [ - -1.0000000138484279e24, - -1.479057788848877, - -1.0000000138484279e24, - 1.7857770919799805, - 0.6723557114601135, - 0.6378745436668396, - -2.262815475463867, - -0.1536862850189209, - -0.5338708758354187, - 1.3628911972045898, - -1.0000000331813535e32, - ], - [ - -1.0000000138484279e24, - 1.1815755367279053, - -1.0000000138484279e24, - -1.880744218826294, - -0.10646091401576996, - 0.1437276005744934, - 1.0795626640319824, - 0.6434042453765869, - 1.0681594610214233, - -0.5814396142959595, - -1.0000000331813535e32, - ], - ], - ] - ), - ) - - -@pytest.mark.parametrize( - "with_constraints", - [True, False], -) -def test_forward_with_labels(with_constraints): - pointer_head = get_pointer_head(num_embeddings=300, embedding_dim=3) - - # shape: (batch_size=2, input_sequence_length=5) - encoder_input_ids = torch.tensor( - [ - [10, 11, 12, 13, 14], - [20, 21, 22, 0, 0], - ] - ).to(torch.long) - encoder_attention_mask = torch.tensor( - [ - [1, 1, 1, 1, 1], - [1, 1, 1, 0, 0], - ] - ).to(torch.long) - # shape: (batch_size=2, input_sequence_length=5, hidden_size=3) - # encoder_last_hidden_state = pointer_head.embeddings(encoder_input_ids) - # shape: (batch_size=2, target_sequence_length=4) - prepared_input_ids = torch.tensor( - [ - # bos (0), offset (0=6-6), offset (1=7-6), label (3) - [100, 10, 11, 110], - # bos (0), label (3), offset (3=9-6), eos (1) - [100, 110, 23, 101], - ] - ).to(torch.long) - # shape: (batch_size=2, target_sequence_length=4) - # last_hidden_state = pointer_head.embeddings(prepared_input_ids) - labels = torch.tensor( - [ - # offset (0=6-6), offset (1=7-6), label (3), label (4) - [6, 7, 3, 4], - # label (3), offset (3=9-6), eos, pad, pad - [3, 9, 1, 2], - ] - ).to(torch.long) - decoder_attention_mask = torch.tensor( - [ - [1, 1, 1, 1], - [1, 1, 1, 0], - ] - ).to(torch.long) - - # shape: (batch_size=2, target_sequence_length=4, num_targets+num_offsets=6+5==11) - constraints = ( - # recap: the target2token_id mapping is (bos, eos, pad, 3 x label ids) - torch.tensor( - [ - [ - # allow all labels - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0], - # allow all offsets different from previous label id (3) - [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], - # allow all offsets different from previous label ids (3, 4) - [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], - # allow all offsets - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], - ], - [ - # allow all labels - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0], - # allow all offsets - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], - # allow all offsets equal or bigger than previous one (9) or eos - [0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1], - # allow only pad - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], - ], - ] - ).to(torch.long) - ) - - torch.manual_seed(42) - # shape: (batch_size=2, input_sequence_length=6, hidden_size=3) - encoder_last_hidden_state = pointer_head.embeddings(encoder_input_ids) - last_hidden_state = pointer_head.embeddings(prepared_input_ids) - _, loss = pointer_head( - encoder_input_ids=encoder_input_ids, - encoder_attention_mask=encoder_attention_mask, - encoder_last_hidden_state=encoder_last_hidden_state, - last_hidden_state=last_hidden_state, - labels=labels, - decoder_attention_mask=decoder_attention_mask, - constraints=constraints if with_constraints else None, - ) - assert loss is not None - maybe_gradients = torch.autograd.grad(loss, pointer_head.parameters(), allow_unused=True) - gradients = [g for g in maybe_gradients if g is not None] - if not with_constraints: - # embeddings.weight, 2 x (encoder_mlp.weight, encoder_mlp.bias) - assert len(gradients) == 5 - # embeddings.weight (just check entries for special tokens and labels) - torch.testing.assert_close( - gradients[0][100:113], - torch.tensor( - [ - [0.29642319679260254, 0.012336060404777527, 0.14099650084972382], - [0.015981415286660194, 0.17855659127235413, -0.21089009940624237], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [-0.8812153935432434, -0.43322375416755676, 0.07359108328819275], - [0.22255337238311768, 0.09604272246360779, 0.017692387104034424], - [-0.021408570930361748, -0.01747075282037258, 0.15882402658462524], - ] - ), - ) - # first encoder_mlp.weight - torch.testing.assert_close( - gradients[1], - torch.tensor( - [ - [6.044770998414606e-05, -0.001140016596764326, 0.0007320810691453516], - [0.014351745136082172, 0.01521987747400999, -0.028653975576162338], - [0.011420723050832748, 0.0070406426675617695, -0.030101824551820755], - ] - ), - ) - # first encoder_mlp.bias - torch.testing.assert_close( - gradients[2], - torch.tensor([-0.0006180311902426183, -0.023118967190384865, -0.024205176159739494]), - ) - # second encoder_mlp.weight - torch.testing.assert_close( - gradients[3], - torch.tensor( - [ - [-0.0005463349516503513, -0.016356423497200012, 0.01958528161048889], - [-0.0005303063080646098, -0.029644077643752098, -0.1391362100839615], - [0.0028533015865832567, 0.08096987009048462, 0.28279614448547363], - ] - ), - ) - # second encoder_mlp.bias - torch.testing.assert_close( - gradients[4], - torch.tensor([-0.030467912554740906, -0.045307278633117676, 0.06145985424518585]), - ) - else: - # embeddings.weight, 2 x (encoder_mlp.weight, encoder_mlp.bias), 2 x (constraints_encoder_mlp.weight, constraints_encoder_mlp.bias) - assert len(gradients) == 9 - # embeddings.weight (just check entries for special tokens and labels) - torch.testing.assert_close( - gradients[0][100:113], - torch.tensor( - [ - [0.2915953993797302, 0.009700030088424683, 0.1484404355287552], - [0.02216985821723938, 0.15251068770885468, -0.21624334156513214], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [-0.8804605007171631, -0.4300656318664551, 0.0664108395576477], - [0.21543428301811218, 0.093157559633255, 0.013825103640556335], - [-0.021408570930361748, -0.01747075282037258, 0.15882402658462524], - ] - ), - ) - # first encoder_mlp.weight - torch.testing.assert_close( - gradients[1], - torch.tensor( - [ - [-0.0003244421095587313, 0.006118832156062126, -0.003929311875253916], - [0.013681752607226372, 0.013532182201743126, -0.027564184740185738], - [0.012365758419036865, 0.00791379064321518, -0.02969365194439888], - ] - ), - ) - # first encoder_mlp.bias - torch.testing.assert_close( - gradients[2], - torch.tensor([0.003317170077934861, -0.021803036332130432, -0.023893579840660095]), - ) - # second encoder_mlp.weight - torch.testing.assert_close( - gradients[3], - torch.tensor( - [ - [-0.004014550242573023, -0.018573174253106117, 0.019694898277521133], - [-0.0019358742283657193, -0.030542463064193726, -0.13909178972244263], - [0.0009692738531157374, 0.0797656774520874, 0.28285568952560425], - ] - ), - ) - # second encoder_mlp.bias - torch.testing.assert_close( - gradients[4], - torch.tensor([-0.046919066458940506, -0.05197446048259735, 0.05252313241362572]), - ) - # first constraints_encoder_mlp.weight - torch.testing.assert_close( - gradients[5], - torch.tensor( - [ - [0.010755524039268494, -0.009512078016996384, -0.007983260788023472], - [0.004236628767102957, 0.002073169220238924, -0.0010695274686440825], - [-0.008700753562152386, -0.00425766222178936, 0.002196485875174403], - ] - ), - ) - # first constraints_encoder_mlp.bias - torch.testing.assert_close( - gradients[6], - torch.tensor([0.05254765599966049, -0.0024578727316111326, 0.005047726910561323]), - ) - # second constraints_encoder_mlp.weight - torch.testing.assert_close( - gradients[7], - torch.tensor( - [ - [0.004190368112176657, -0.01078515499830246, -0.015312351286411285], - [0.001505501102656126, -0.006679146084934473, -0.009482797235250473], - [0.02189277485013008, -0.010388202033936977, -0.014748772606253624], - ] - ), - ) - # second constraints_encoder_mlp.bias - torch.testing.assert_close( - gradients[8], - torch.tensor([0.016296036541461945, -0.00018996000289916992, 0.05888192355632782]), - ) diff --git a/tests/models/components/test_pooler.py b/tests/models/components/test_pooler.py index a63b36a68..e69de29bb 100644 --- a/tests/models/components/test_pooler.py +++ b/tests/models/components/test_pooler.py @@ -1,218 +0,0 @@ -import pytest -import torch - -from pie_modules.models.components.pooler import ( - CLS_TOKEN, - MENTION_POOLING, - START_TOKENS, - ArgumentWrappedPooler, - AtIndexPooler, - SpanMaxPooler, - SpanMeanPooler, - get_pooler_and_output_size, - pool_cls, -) - - -@pytest.mark.parametrize( - "pooler_type", - [ - CLS_TOKEN, - START_TOKENS, - MENTION_POOLING, - ], -) -def test_get_pooler_and_output_size(pooler_type): - pooler, output_size = get_pooler_and_output_size(config={"type": pooler_type}, input_dim=20) - assert pooler is not None - if pooler_type == CLS_TOKEN: - assert output_size == 20 - elif pooler_type in (START_TOKENS, MENTION_POOLING): - # pre default, num_indices is 2 - assert output_size == 20 * 2 - else: - raise ValueError(f"Unknown pooler type {pooler_type}") - - -@pytest.mark.parametrize("aggregate", ["max", "mean"]) -def test_get_pooler_and_output_size_mention(aggregate): - pooler, output_size = get_pooler_and_output_size( - config={"type": MENTION_POOLING, "aggregate": aggregate}, input_dim=20 - ) - assert pooler is not None - assert output_size == 20 * 2 - if aggregate == "max": - assert isinstance(pooler, SpanMaxPooler) - elif aggregate == "mean": - assert isinstance(pooler, SpanMeanPooler) - else: - raise ValueError(f"Unknown aggregate type {aggregate}") - - -def test_get_pooler_and_output_size_mention_unknown_aggregate(): - with pytest.raises(ValueError) as excinfo: - get_pooler_and_output_size( - config={"type": MENTION_POOLING, "aggregate": "unknown"}, input_dim=20 - ) - assert str(excinfo.value) == 'Unknown aggregation method for mention pooling: "unknown"' - - -def test_get_pooler_and_output_size_wrong_type(): - with pytest.raises(ValueError) as excinfo: - get_pooler_and_output_size(config={"type": "wrong_type"}, input_dim=20) - assert str(excinfo.value) == 'Unknown pooler type "wrong_type"' - - -@pytest.fixture(scope="session") -def hidde_state(): - result = torch.tensor( - [ - [[0.00, 0.01], [0.10, 0.11], [0.20, 0.21], [0.30, 0.31]], - [[1.00, 1.01], [1.10, 1.11], [1.20, 1.21], [1.30, 1.31]], - ] - ) - # batch_size x sequence_length x hidden_size - assert result.shape == (2, 4, 2) - return result - - -def test_pool_cls(hidde_state): - pooler = pool_cls - output = pooler(hidden_state=hidde_state) - assert output is not None - batch_size = hidde_state.shape[0] - hidden_size = hidde_state.shape[-1] - assert output.shape == (batch_size, hidden_size) - torch.testing.assert_close(output, hidde_state[:, 0, :]) - torch.testing.assert_close(output, torch.tensor([[0.00, 0.01], [1.00, 1.01]])) - - -def test_at_index_pooler(hidde_state): - pooler = AtIndexPooler(input_dim=hidde_state.shape[-1], num_indices=2) - indices = torch.tensor([[2, 0], [1, 0]]) - output = pooler(hidden_state=hidde_state, indices=indices) - assert output is not None - batch_size = hidde_state.shape[0] - hidden_size = hidde_state.shape[-1] - # times num_indices (=2) due to concat - assert output.shape == (batch_size, hidden_size * 2) - torch.testing.assert_close( - output, torch.tensor([[0.20, 0.21, 0.00, 0.01], [1.10, 1.11, 1.00, 1.01]]) - ) - - -def test_at_index_pooler_with_offset(hidde_state): - # set the seed to make sure that we get the same missing embeddings - torch.manual_seed(42) - pooler = AtIndexPooler(input_dim=hidde_state.shape[-1], num_indices=2, offset=-1) - indices = torch.tensor([[2, 1], [0, -10]]) - output = pooler(hidden_state=hidde_state, indices=indices) - assert output is not None - batch_size = hidde_state.shape[0] - hidden_size = hidde_state.shape[-1] - # times num_indices (=2) due to concat - assert output.shape == (batch_size, hidden_size * 2) - # the second batch element has out of bounds indices, so we expect the missing embeddings - # it needs to be flattened, because the output is concatenated - torch.testing.assert_close(output[1], pooler.missing_embeddings.view(-1)) - torch.testing.assert_close( - output, - torch.tensor( - [ - [0.10, 0.11, 0.00, 0.01], - [ - 0.33669036626815796, - 0.12880940735340118, - 0.23446236550807953, - 0.23033303022384644, - ], - ] - ), - ) - - -def test_at_index_pooler_wrong_indices_shapes(hidde_state): - pooler = AtIndexPooler(input_dim=hidde_state.shape[-1], num_indices=2) - indices = torch.tensor([[2, 0, 1], [1, 0, 0]]) - with pytest.raises(ValueError) as excinfo: - pooler(hidden_state=hidde_state, indices=indices) - assert str(excinfo.value) == "number of indices [3] has to be the same as num_types [2]" - - -def test_argument_wrapped_pooler(hidde_state): - def dummy_pooler(hidden_state, y): - return hidden_state[:, 0, :] - - pooler = ArgumentWrappedPooler(pooler=dummy_pooler, argument_mapping={"x": "y"}) - output = pooler(hidden_state=hidde_state, x="dummy") - assert output is not None - batch_size = hidde_state.shape[0] - hidden_size = hidde_state.shape[-1] - assert output.shape == (batch_size, hidden_size) - torch.testing.assert_close(output, hidde_state[:, 0, :]) - - -def test_span_max_pooler(hidde_state): - pooler = SpanMaxPooler(input_dim=hidde_state.shape[-1], num_indices=2) - start_indices = torch.tensor([[2, 0], [0, 1]]) - end_indices = torch.tensor([[3, 3], [1, 2]]) - output = pooler(hidden_state=hidde_state, start_indices=start_indices, end_indices=end_indices) - assert output is not None - batch_size = hidde_state.shape[0] - hidden_size = hidde_state.shape[-1] - # times num_indices (=2) due to concat - assert output.shape == (batch_size, hidden_size * 2) - torch.testing.assert_close( - output, torch.tensor([[0.20, 0.21, 0.20, 0.21], [1.00, 1.01, 1.10, 1.11]]) - ) - - -def test_span_max_pooler_wrong_start_indices_shape(hidde_state): - pooler = SpanMaxPooler(input_dim=hidde_state.shape[-1], num_indices=2) - start_indices = torch.tensor([[2, 0, 1], [0, 1, 0]]) - end_indices = torch.tensor([[3, 3], [1, 2]]) - with pytest.raises(ValueError) as excinfo: - pooler(hidden_state=hidde_state, start_indices=start_indices, end_indices=end_indices) - assert str(excinfo.value) == ( - "number of start indices [3] has to be the same as num_types [2]" - ) - - -def test_span_max_pooler_wrong_end_indices_shape(hidde_state): - pooler = SpanMaxPooler(input_dim=hidde_state.shape[-1], num_indices=2) - start_indices = torch.tensor([[2, 0], [0, 1]]) - end_indices = torch.tensor([[3, 3, 3], [1, 2, 1]]) - with pytest.raises(ValueError) as excinfo: - pooler(hidden_state=hidde_state, start_indices=start_indices, end_indices=end_indices) - assert str(excinfo.value) == ("number of end indices [3] has to be the same as num_types [2]") - - -def test_span_max_pooler_start_indices_bigger_than_end_indices(hidde_state): - pooler = SpanMaxPooler(input_dim=hidde_state.shape[-1], num_indices=2) - start_indices = torch.tensor([[2, 0], [0, 1]]) - end_indices = torch.tensor([[1, 3], [1, 2]]) - with pytest.raises(ValueError) as excinfo: - pooler(hidden_state=hidde_state, start_indices=start_indices, end_indices=end_indices) - assert str(excinfo.value) == ( - "values in start_indices have to be smaller than respective values in end_indices, but start_indices=\n" - "tensor([[2, 0],\n" - " [0, 1]])\n " - "and end_indices=\n" - "tensor([[1, 3],\n" - " [1, 2]])" - ) - - -def test_span_mean_pooler(hidde_state): - pooler = SpanMeanPooler(input_dim=hidde_state.shape[-1], num_indices=2) - start_indices = torch.tensor([[2, 0], [0, 1]]) - end_indices = torch.tensor([[3, 3], [1, 2]]) - output = pooler(hidden_state=hidde_state, start_indices=start_indices, end_indices=end_indices) - assert output is not None - batch_size = hidde_state.shape[0] - hidden_size = hidde_state.shape[-1] - # times num_indices (=2) due to concat - assert output.shape == (batch_size, hidden_size * 2) - torch.testing.assert_close( - output, torch.tensor([[0.20, 0.21, 0.10, 0.11], [1.00, 1.01, 1.10, 1.11]]) - ) diff --git a/tests/models/components/test_seq2seq_encoder.py b/tests/models/components/test_seq2seq_encoder.py deleted file mode 100644 index 58e2fc0c5..000000000 --- a/tests/models/components/test_seq2seq_encoder.py +++ /dev/null @@ -1,93 +0,0 @@ -import pytest -import torch - -from pie_modules.models.components.seq2seq_encoder import ( - ACTIVATION_TYPE2CLASS, - RNN_TYPE2CLASS, - build_seq2seq_encoder, -) - - -def test_no_encoder(): - seq2seq_dict = {} - input_size = 10 - encoder, output_size = build_seq2seq_encoder(seq2seq_dict, input_size) - assert encoder is None - assert output_size == input_size - - seq2seq_dict = { - "type": "sequential", - "rnn_layer": { - "type": "none", - }, - } - input_size = 10 - encoder, output_size = build_seq2seq_encoder(seq2seq_dict, input_size) - assert len(encoder) == 0 - assert output_size == input_size - - -@pytest.mark.parametrize("seq2seq_enc_type", list(RNN_TYPE2CLASS)) -@pytest.mark.parametrize("bidirectional", [True, False]) -def test_rnn_encoder(seq2seq_enc_type, bidirectional): - hidden_size = 99 - seq2seq_dict = { - "type": seq2seq_enc_type, - "hidden_size": hidden_size, - "bidirectional": bidirectional, - } - input_size = 10 - encoder, output_size = build_seq2seq_encoder(seq2seq_dict, input_size) - assert encoder is not None - assert isinstance(encoder.rnn, RNN_TYPE2CLASS[seq2seq_enc_type]) - - expected_output_size = hidden_size * 2 if bidirectional else hidden_size - assert output_size is not None - assert output_size == expected_output_size - - -@pytest.mark.parametrize("activation_type", list(ACTIVATION_TYPE2CLASS)) -def test_activations(activation_type): - seq2seq_dict = { - "type": activation_type, - } - input_size = 10 - encoder, output_size = build_seq2seq_encoder(seq2seq_dict, input_size) - assert encoder is not None - assert isinstance(encoder, ACTIVATION_TYPE2CLASS[activation_type]) - assert output_size == input_size - - -def test_dropout(): - seq2seq_dict = { - "type": "dropout", - "p": 0.5, - } - input_size = 10 - encoder, output_size = build_seq2seq_encoder(seq2seq_dict, input_size) - assert encoder is not None - assert isinstance(encoder, torch.nn.Dropout) - assert output_size == input_size - - -def test_linear(): - out_features = 99 - seq2seq_dict = { - "type": "linear", - "out_features": out_features, - } - - input_size = 10 - encoder, output_size = build_seq2seq_encoder(seq2seq_dict, input_size) - assert encoder is not None - assert isinstance(encoder, torch.nn.Linear) - assert output_size == out_features - - -def test_unknown_rnn_type(): - seq2seq_dict = { - "type": "unknown", - } - with pytest.raises(ValueError) as exc_info: - build_seq2seq_encoder(seq2seq_dict, 10) - assert str(exc_info.value) == "Unknown seq2seq_encoder_type: unknown" diff --git a/tests/models/test_extractive_question_answering.py b/tests/models/test_extractive_question_answering.py deleted file mode 100644 index 9d8b6c1cb..000000000 --- a/tests/models/test_extractive_question_answering.py +++ /dev/null @@ -1,245 +0,0 @@ -import json - -import pytest -import torch -import transformers -from pytorch_lightning import Trainer - -from pie_modules.annotations import ExtractiveAnswer, Question -from pie_modules.documents import TextDocumentWithQuestionsAndExtractiveAnswers -from pie_modules.models.simple_extractive_question_answering import ( - SimpleExtractiveQuestionAnsweringModel, -) -from pie_modules.taskmodules.extractive_question_answering import ( - ExtractiveQuestionAnsweringTaskModule, -) -from tests import DUMP_FIXTURE_DATA, FIXTURES_ROOT - -FIXTURES_TASKMODULE_DATA_PATH = FIXTURES_ROOT / "taskmodules" / "extractive_question_answering" - - -@pytest.fixture -def documents(): - document0 = TextDocumentWithQuestionsAndExtractiveAnswers( - text="This is a test document", id="doc0" - ) - document0.questions.append(Question(text="What is the first word?")) - document0.answers.append(ExtractiveAnswer(question=document0.questions[0], start=0, end=3)) - - document1 = TextDocumentWithQuestionsAndExtractiveAnswers( - text="Oranges are orange in color.", id="doc1" - ) - document1.questions.append(Question(text="What color are oranges?")) - document1.answers.append(ExtractiveAnswer(question=document1.questions[0], start=23, end=27)) - - document2 = TextDocumentWithQuestionsAndExtractiveAnswers( - text="This is a test document that has two questions attached to it.", id="doc2" - ) - document2.questions.append(Question(text="What type of document is this?")) - document2.questions.append(Question(text="How many questions are attached to this document?")) - document2.answers.append(ExtractiveAnswer(question=document2.questions[0], start=11, end=14)) - document2.answers.append(ExtractiveAnswer(question=document2.questions[1], start=34, end=36)) - - documents = [document0, document1, document2] - return documents - - -@pytest.mark.skipif( - condition=not DUMP_FIXTURE_DATA, - reason="Only need to dump the data if taskmodule has changed", -) -def test_dump_fixtures(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = ExtractiveQuestionAnsweringTaskModule( - tokenizer_name_or_path=tokenizer_name_or_path, - max_length=512, - ) - - task_encodings = taskmodule.encode(documents, encode_target=True) - batch_encoding = taskmodule.collate(task_encodings) - - FIXTURES_TASKMODULE_DATA_PATH.mkdir(parents=True, exist_ok=True) - filepath = FIXTURES_TASKMODULE_DATA_PATH / "batch_encoding_inputs.json" - - inputs = {key: tensor.tolist() for key, tensor in batch_encoding[0].items()} - targets = {key: tensor.tolist() for key, tensor in batch_encoding[1].items()} - converted_batch_encoding = { - "inputs": inputs, - "targets": targets, - } - - with open(filepath, "w") as f: - json.dump(converted_batch_encoding, f) - return converted_batch_encoding - - -@pytest.fixture -def batch(): - filepath = FIXTURES_TASKMODULE_DATA_PATH / "batch_encoding_inputs.json" - with open(filepath) as f: - batch_encoding = json.load(f) - - inputs = {key: torch.LongTensor(tensor) for key, tensor in batch_encoding["inputs"].items()} - targets = {key: torch.LongTensor(tensor) for key, tensor in batch_encoding["targets"].items()} - return inputs, targets - - -def get_model( - monkeypatch, - model_type, - batch_size, - seq_len, - add_dummy_linear=False, - **model_kwargs, -): - class MockConfig: - def __init__( - self, - hidden_size: int = 10, - model_type=model_type, - ) -> None: - self.hidden_size = hidden_size - self.model_type = model_type - - class MockModel(torch.nn.Module): - def __init__(self, batch_size, seq_len, hidden_size, add_dummy_linear) -> None: - super().__init__() - self.batch_size = batch_size - self.seq_len = seq_len - self.hidden_size = hidden_size - if add_dummy_linear: - self.dummy_linear = torch.nn.Linear(self.hidden_size, 99) - - def __call__(self, *args, **kwargs): - torch.manual_seed(42) - start_logits = torch.FloatTensor(torch.rand(self.batch_size, self.seq_len)) - end_logits = torch.FloatTensor(torch.rand(self.batch_size, self.seq_len)) - loss = torch.FloatTensor(torch.rand(1)) - return transformers.modeling_outputs.QuestionAnsweringModelOutput( - start_logits=start_logits, - end_logits=end_logits, - loss=loss, - ) - - hidden_size = 10 - - monkeypatch.setattr( - transformers.AutoConfig, - "from_pretrained", - lambda model_name_or_path: MockConfig(hidden_size=hidden_size, model_type=model_type), - ) - monkeypatch.setattr( - transformers.AutoModelForQuestionAnswering, - "from_pretrained", - lambda model_name_or_path, config: MockModel( - batch_size=batch_size, - seq_len=seq_len, - hidden_size=hidden_size, - add_dummy_linear=add_dummy_linear, - ), - ) - - # set seed to make the classifier deterministic - torch.manual_seed(42) - result = SimpleExtractiveQuestionAnsweringModel( - model_name_or_path=model_type, - max_input_length=seq_len, - **model_kwargs, - ) - assert not result.is_from_pretrained - - return result - - -@pytest.fixture -def model(monkeypatch, batch): - inputs, targets = batch - model = get_model( - monkeypatch=monkeypatch, - model_type="bert", - batch_size=inputs["input_ids"].shape[0], - seq_len=inputs["input_ids"].shape[1], - add_dummy_linear=True, - ) - return model - - -def test_get_model(monkeypatch, model): - assert model is not None - assert isinstance(model, SimpleExtractiveQuestionAnsweringModel) - - -def test_forward(batch, model): - inputs, targets = batch - batch_size, seq_len = inputs["input_ids"].shape - - # set seed to make sure the output is deterministic - torch.manual_seed(42) - output = model.forward(inputs) - assert set(output) == {"start_logits", "end_logits", "loss"} - start_logits = output["start_logits"] - end_logits = output["end_logits"] - loss = output["loss"] - assert start_logits.shape == (batch_size, seq_len) - assert end_logits.shape == (batch_size, seq_len) - assert loss.shape == (1,) - expected_loss = torch.FloatTensor([0.04587]) - torch.testing.assert_close(output["loss"], expected_loss) - - -def test_step(batch, model): - torch.manual_seed(42) - loss = model.step("train", batch) - assert loss is not None - expected_loss = torch.FloatTensor([0.04587]) - torch.testing.assert_close(loss, expected_loss) - - -def test_training_step(batch, model): - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - expected_loss = torch.FloatTensor([0.04587]) - torch.testing.assert_close(loss, expected_loss) - - -def test_validation_step(batch, model): - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - expected_loss = torch.FloatTensor([0.04587]) - torch.testing.assert_close(loss, expected_loss) - - -def test_test_step(batch, model): - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - expected_loss = torch.FloatTensor([0.04587]) - torch.testing.assert_close(loss, expected_loss) - - -def test_optim(model): - optimizer = model.configure_optimizers() - assert optimizer is not None - assert isinstance(optimizer, torch.optim.Adam) - assert optimizer.defaults["lr"] == 1e-05 - - -def test_optim_with_warmup_proportion(monkeypatch, batch): - inputs, targets = batch - model = get_model( - monkeypatch=monkeypatch, - model_type="bert", - batch_size=inputs["input_ids"].shape[0], - seq_len=inputs["input_ids"].shape[1], - add_dummy_linear=True, - warmup_proportion=0.1, - ) - model.trainer = Trainer(max_epochs=10) - optimizers_and_schedulars = model.configure_optimizers() - assert optimizers_and_schedulars is not None - assert isinstance(optimizers_and_schedulars, tuple) and len(optimizers_and_schedulars) == 2 - - optimizers, schedulers = optimizers_and_schedulars - assert isinstance(optimizers[0], torch.optim.Optimizer) - assert set(schedulers[0]) == {"scheduler", "interval"} - schedular = schedulers[0]["scheduler"] - assert isinstance(schedular, torch.optim.lr_scheduler.LRScheduler) diff --git a/tests/models/test_sequence_classification_with_pooler.py b/tests/models/test_sequence_classification_with_pooler.py deleted file mode 100644 index 07f9e10cd..000000000 --- a/tests/models/test_sequence_classification_with_pooler.py +++ /dev/null @@ -1,578 +0,0 @@ -from typing import Dict - -import pytest -import torch -from pytorch_lightning import Trainer -from torch import LongTensor, tensor -from torch.optim.lr_scheduler import LambdaLR -from transformers.modeling_outputs import SequenceClassifierOutput - -from pie_modules.models import SequenceClassificationModelWithPooler -from pie_modules.models.sequence_classification_with_pooler import OutputType -from tests.models import trunc_number - -NUM_CLASSES = 4 -POOLER = "start_tokens" - - -@pytest.fixture -def inputs() -> Dict[str, LongTensor]: - result_dict = { - "input_ids": torch.tensor( - [ - [ - 101, - 28998, - 13832, - 3121, - 2340, - 138, - 28996, - 1759, - 1120, - 28999, - 139, - 28997, - 119, - 102, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28998, - 13832, - 3121, - 2340, - 144, - 28996, - 1759, - 1120, - 28999, - 145, - 28997, - 119, - 1262, - 1771, - 146, - 119, - 102, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28998, - 13832, - 3121, - 2340, - 144, - 28996, - 1759, - 1120, - 145, - 119, - 1262, - 1771, - 28999, - 146, - 28997, - 119, - 102, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 13832, - 3121, - 2340, - 144, - 1759, - 1120, - 28999, - 145, - 28997, - 119, - 1262, - 1771, - 28998, - 146, - 28996, - 119, - 102, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28998, - 13832, - 3121, - 2340, - 150, - 28996, - 1759, - 1120, - 28999, - 151, - 28997, - 119, - 1262, - 1122, - 1771, - 152, - 119, - 102, - ], - [ - 101, - 1752, - 5650, - 119, - 13832, - 3121, - 2340, - 150, - 1759, - 1120, - 151, - 119, - 1262, - 28998, - 1122, - 28996, - 1771, - 28999, - 152, - 28997, - 119, - 102, - ], - [ - 101, - 1752, - 5650, - 119, - 13832, - 3121, - 2340, - 150, - 1759, - 1120, - 151, - 119, - 1262, - 28999, - 1122, - 28997, - 1771, - 28998, - 152, - 28996, - 119, - 102, - ], - ] - ).to(torch.long), - "attention_mask": torch.tensor( - [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ).to(torch.long), - "pooler_start_indices": torch.tensor( - [[2, 10], [5, 13], [5, 17], [17, 11], [5, 13], [14, 18], [18, 14]] - ).to(torch.long), - "pooler_end_indices": torch.tensor( - [[6, 11], [9, 14], [9, 18], [18, 12], [9, 14], [15, 19], [19, 15]] - ).to(torch.long), - } - - return result_dict - - -@pytest.fixture -def targets() -> Dict[str, LongTensor]: - return {"labels": torch.tensor([0, 1, 2, 3, 1, 2, 3]).to(torch.long)} - - -@pytest.fixture -def model() -> SequenceClassificationModelWithPooler: - torch.manual_seed(42) - result = SequenceClassificationModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - pooler=POOLER, - ) - return result - - -def test_model(model): - assert model is not None - named_parameters = dict(model.named_parameters()) - parameter_means = {k: trunc_number(v.mean().item(), 7) for k, v in named_parameters.items()} - parameter_means_expected = { - "classifier.bias": -0.0253964, - "classifier.weight": -0.000511, - "model.embeddings.LayerNorm.bias": -0.0294608, - "model.embeddings.LayerNorm.weight": 1.312345, - "model.embeddings.position_embeddings.weight": 5.5e-05, - "model.embeddings.token_type_embeddings.weight": -0.0015419, - "model.embeddings.word_embeddings.weight": 0.0031152, - "model.encoder.layer.0.attention.output.LayerNorm.bias": 0.0608714, - "model.encoder.layer.0.attention.output.LayerNorm.weight": 1.199831, - "model.encoder.layer.0.attention.output.dense.bias": 0.0007209, - "model.encoder.layer.0.attention.output.dense.weight": 3.01e-05, - "model.encoder.layer.0.attention.self.key.bias": 0.0020557, - "model.encoder.layer.0.attention.self.key.weight": 0.0003863, - "model.encoder.layer.0.attention.self.query.bias": 0.0185744, - "model.encoder.layer.0.attention.self.query.weight": -0.0003949, - "model.encoder.layer.0.attention.self.value.bias": 0.0065417, - "model.encoder.layer.0.attention.self.value.weight": 4.22e-05, - "model.encoder.layer.0.intermediate.dense.bias": -0.1219958, - "model.encoder.layer.0.intermediate.dense.weight": -0.0011731, - "model.encoder.layer.0.output.LayerNorm.bias": 0.005295, - "model.encoder.layer.0.output.LayerNorm.weight": 1.2419648, - "model.encoder.layer.0.output.dense.bias": -0.0013031, - "model.encoder.layer.0.output.dense.weight": -0.0002212, - "model.encoder.layer.1.attention.output.LayerNorm.bias": 0.0443237, - "model.encoder.layer.1.attention.output.LayerNorm.weight": 1.0377343, - "model.encoder.layer.1.attention.output.dense.bias": 0.0041446, - "model.encoder.layer.1.attention.output.dense.weight": -2.43e-05, - "model.encoder.layer.1.attention.self.key.bias": 0.0045062, - "model.encoder.layer.1.attention.self.key.weight": 0.0001333, - "model.encoder.layer.1.attention.self.query.bias": -0.0358397, - "model.encoder.layer.1.attention.self.query.weight": -0.0007321, - "model.encoder.layer.1.attention.self.value.bias": -0.0007094, - "model.encoder.layer.1.attention.self.value.weight": 0.0001012, - "model.encoder.layer.1.intermediate.dense.bias": -0.1247257, - "model.encoder.layer.1.intermediate.dense.weight": -0.001344, - "model.encoder.layer.1.output.LayerNorm.bias": -0.0474442, - "model.encoder.layer.1.output.LayerNorm.weight": 1.017162, - "model.encoder.layer.1.output.dense.bias": 0.000677, - "model.encoder.layer.1.output.dense.weight": -5.32e-05, - "model.pooler.dense.bias": -0.0052078, - "model.pooler.dense.weight": 0.0001295, - "pooler.pooler.missing_embeddings": 0.0630417, - } - assert parameter_means == parameter_means_expected - - -def test_model_pickleable(model): - import pickle - - pickle.dumps(model) - - -@pytest.fixture -def model_output(model, inputs) -> OutputType: - # set seed to make sure the output is deterministic - torch.manual_seed(42) - return model(inputs) - - -def test_forward_logits(model_output, inputs): - batch_size, seq_len = inputs["input_ids"].shape - - assert isinstance(model_output, SequenceClassifierOutput) - - logits = model_output.logits - - assert logits.shape == (batch_size, NUM_CLASSES) - - torch.testing.assert_close( - logits, - torch.tensor( - [ - [ - -0.29492446780204773, - -0.804599940776825, - -0.19659805297851562, - -1.0868580341339111, - ], - [ - -0.3601434826850891, - -0.7454482316970825, - 0.4882846474647522, - -1.0253472328186035, - ], - [ - -0.40172430872917175, - -1.2119399309158325, - 0.5856620669364929, - -1.0999149084091187, - ], - [ - -0.09260234981775284, - -1.0742112398147583, - 0.3299948275089264, - -0.5182554125785828, - ], - [ - -0.40149545669555664, - -0.7731614708900452, - 0.4616103768348694, - -1.0583568811416626, - ], - [ - -0.14356234669685364, - -1.2634986639022827, - 0.31660008430480957, - -0.7487461566925049, - ], - [ - -0.11717557162046432, - -0.971996009349823, - 0.3477852940559387, - -0.5993944406509399, - ], - ] - ), - ) - - -def test_decode(model, model_output, inputs): - decoded = model.decode(inputs=inputs, outputs=model_output) - assert isinstance(decoded, dict) - assert set(decoded) == {"labels", "probabilities"} - labels = decoded["labels"] - assert labels.shape == (inputs["input_ids"].shape[0],) - torch.testing.assert_close( - labels, - torch.tensor([2, 2, 2, 2, 2, 2, 2]), - ) - probabilities = decoded["probabilities"] - assert probabilities.shape == (inputs["input_ids"].shape[0], NUM_CLASSES) - torch.testing.assert_close( - probabilities.round(decimals=4), - torch.tensor( - [ - [0.3168, 0.1903, 0.3495, 0.1435], - [0.2207, 0.1502, 0.5156, 0.1135], - [0.2161, 0.0961, 0.5802, 0.1075], - [0.2814, 0.1054, 0.4294, 0.1838], - [0.2184, 0.1506, 0.5177, 0.1132], - [0.2893, 0.0944, 0.4583, 0.1580], - [0.2751, 0.1170, 0.4380, 0.1699], - ] - ), - ) - - -def test_decode_with_multi_label(model_output, inputs): - torch.manual_seed(42) - model = SequenceClassificationModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - pooler=POOLER, - multi_label=True, - ) - decoded = model.decode(inputs=inputs, outputs=model_output) - assert isinstance(decoded, dict) - assert set(decoded) == {"labels", "probabilities"} - labels = decoded["labels"] - assert labels.shape == (inputs["input_ids"].shape[0], NUM_CLASSES) - torch.testing.assert_close( - labels, - torch.tensor( - [ - [0, 0, 0, 0], - [0, 0, 1, 0], - [0, 0, 1, 0], - [0, 0, 1, 0], - [0, 0, 1, 0], - [0, 0, 1, 0], - [0, 0, 1, 0], - ] - ), - ) - probabilities = decoded["probabilities"] - assert probabilities.shape == (inputs["input_ids"].shape[0], NUM_CLASSES) - torch.testing.assert_close( - probabilities.round(decimals=4), - torch.tensor( - [ - [0.4268, 0.3090, 0.4510, 0.2522], - [0.4109, 0.3218, 0.6197, 0.2640], - [0.4009, 0.2294, 0.6424, 0.2498], - [0.4769, 0.2546, 0.5818, 0.3733], - [0.4010, 0.3158, 0.6134, 0.2576], - [0.4642, 0.2204, 0.5785, 0.3211], - [0.4707, 0.2745, 0.5861, 0.3545], - ] - ), - ) - - -@pytest.fixture -def batch(inputs, targets): - return inputs, targets - - -def test_training_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.3899686336517334)) - - -def test_validation_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.3899686336517334)) - - -def test_test_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.3899686336517334)) - - -def test_base_model_named_parameters(model): - base_model_named_parameters = dict(model.base_model_named_parameters()) - assert set(base_model_named_parameters) == { - "model.pooler.dense.bias", - "model.encoder.layer.0.intermediate.dense.weight", - "model.encoder.layer.0.intermediate.dense.bias", - "model.encoder.layer.1.attention.output.dense.weight", - "model.encoder.layer.1.attention.output.LayerNorm.weight", - "model.encoder.layer.1.attention.self.query.weight", - "model.encoder.layer.1.output.dense.weight", - "model.encoder.layer.0.output.dense.bias", - "model.encoder.layer.1.intermediate.dense.bias", - "model.encoder.layer.1.attention.self.value.bias", - "model.encoder.layer.0.attention.output.dense.weight", - "model.encoder.layer.0.attention.self.query.bias", - "model.encoder.layer.0.attention.self.value.bias", - "model.encoder.layer.1.output.dense.bias", - "model.encoder.layer.1.attention.self.query.bias", - "model.encoder.layer.1.attention.output.LayerNorm.bias", - "model.encoder.layer.0.attention.self.query.weight", - "model.encoder.layer.0.attention.output.LayerNorm.bias", - "model.encoder.layer.0.attention.self.key.bias", - "model.encoder.layer.1.intermediate.dense.weight", - "model.encoder.layer.1.output.LayerNorm.bias", - "model.encoder.layer.1.output.LayerNorm.weight", - "model.encoder.layer.0.attention.self.key.weight", - "model.encoder.layer.1.attention.output.dense.bias", - "model.encoder.layer.0.attention.output.dense.bias", - "model.embeddings.LayerNorm.bias", - "model.encoder.layer.0.attention.self.value.weight", - "model.encoder.layer.0.attention.output.LayerNorm.weight", - "model.embeddings.token_type_embeddings.weight", - "model.encoder.layer.0.output.LayerNorm.weight", - "model.embeddings.position_embeddings.weight", - "model.encoder.layer.1.attention.self.key.bias", - "model.embeddings.LayerNorm.weight", - "model.encoder.layer.0.output.LayerNorm.bias", - "model.encoder.layer.1.attention.self.key.weight", - "model.pooler.dense.weight", - "model.encoder.layer.0.output.dense.weight", - "model.embeddings.word_embeddings.weight", - "model.encoder.layer.1.attention.self.value.weight", - } - - -def test_task_named_parameters(model): - task_named_parameters = dict(model.task_named_parameters()) - assert set(task_named_parameters) == { - "classifier.weight", - "pooler.pooler.missing_embeddings", - "classifier.bias", - } - - -def test_configure_optimizers_with_warmup(): - model = SequenceClassificationModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - ) - model.trainer = Trainer(max_epochs=10) - optimizers_and_schedulers = model.configure_optimizers() - assert len(optimizers_and_schedulers) == 2 - optimizers, schedulers = optimizers_and_schedulers - assert len(optimizers) == 1 - assert len(schedulers) == 1 - optimizer = optimizers[0] - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults["lr"] == 1e-05 - assert optimizer.defaults["weight_decay"] == 0.01 - assert optimizer.defaults["eps"] == 1e-08 - - scheduler = schedulers[0] - assert isinstance(scheduler, dict) - assert set(scheduler) == {"scheduler", "interval"} - assert isinstance(scheduler["scheduler"], LambdaLR) - - -def test_configure_optimizers_with_task_learning_rate(monkeypatch): - model = SequenceClassificationModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - learning_rate=1e-5, - task_learning_rate=1e-3, - # disable warmup to make sure the scheduler is not added which would set the learning rate - # to 0 - warmup_proportion=0.0, - ) - optimizer = model.configure_optimizers() - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert len(optimizer.param_groups) == 2 - # base model parameters - param_group = optimizer.param_groups[0] - assert len(param_group["params"]) == 39 - assert param_group["lr"] == 1e-5 - # classifier head parameters - param_group = optimizer.param_groups[1] - assert len(param_group["params"]) == 2 - assert param_group["lr"] == 1e-3 - # ensure that all parameters are covered - assert set(optimizer.param_groups[0]["params"] + optimizer.param_groups[1]["params"]) == set( - model.parameters() - ) - - -def test_freeze_base_model(monkeypatch, inputs, targets): - model = SequenceClassificationModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - freeze_base_model=True, - # disable warmup to make sure the scheduler is not added which would set the learning rate - # to 0 - warmup_proportion=0.0, - ) - base_model_params = [param for name, param in model.base_model_named_parameters()] - task_params = [param for name, param in model.task_named_parameters()] - assert len(base_model_params) + len(task_params) == len(list(model.parameters())) - for param in base_model_params: - assert not param.requires_grad - for param in task_params: - assert param.requires_grad diff --git a/tests/models/test_sequence_pair_similarity_model_with_pooler.py b/tests/models/test_sequence_pair_similarity_model_with_pooler.py deleted file mode 100644 index 3e6871a11..000000000 --- a/tests/models/test_sequence_pair_similarity_model_with_pooler.py +++ /dev/null @@ -1,326 +0,0 @@ -from typing import Dict - -import pytest -import torch -from pytorch_lightning import Trainer -from torch import LongTensor, tensor -from torch.optim.lr_scheduler import LambdaLR -from transformers.modeling_outputs import SequenceClassifierOutput - -from pie_modules.models import SequencePairSimilarityModelWithPooler -from pie_modules.models.sequence_classification_with_pooler import OutputType -from tests.models import trunc_number - - -@pytest.fixture -def inputs() -> Dict[str, LongTensor]: - result_dict = { - "encoding": { - "input_ids": tensor( - [ - [101, 1262, 1131, 1771, 140, 119, 102], - [101, 1262, 1131, 1771, 140, 119, 102], - [101, 1262, 1131, 1771, 140, 119, 102], - [101, 1262, 1131, 1771, 140, 119, 102], - ] - ), - "token_type_ids": tensor( - [ - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - ] - ), - "attention_mask": tensor( - [ - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - ] - ), - }, - "encoding_pair": { - "input_ids": tensor( - [ - [101, 3162, 7871, 1117, 5855, 119, 102], - [101, 3162, 7871, 1117, 5855, 119, 102], - [101, 3162, 7871, 1117, 5855, 119, 102], - [101, 3162, 7871, 1117, 5855, 119, 102], - ] - ), - "token_type_ids": tensor( - [ - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - ] - ), - "attention_mask": tensor( - [ - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - ] - ), - }, - "pooler_start_indices": tensor([[2], [2], [4], [4]]), - "pooler_end_indices": tensor([[3], [3], [5], [5]]), - "pooler_pair_start_indices": tensor([[1], [3], [1], [3]]), - "pooler_pair_end_indices": tensor([[2], [5], [2], [5]]), - } - - return result_dict - - -@pytest.fixture -def targets() -> Dict[str, LongTensor]: - return {"scores": tensor([0.0, 0.0, 0.0, 0.0])} - - -@pytest.fixture -def model() -> SequencePairSimilarityModelWithPooler: - torch.manual_seed(42) - result = SequencePairSimilarityModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - ) - return result - - -def test_model(model): - assert model is not None - named_parameters = dict(model.named_parameters()) - parameter_means = {k: trunc_number(v.mean().item(), 7) for k, v in named_parameters.items()} - parameter_means_expected = { - "model.embeddings.word_embeddings.weight": 0.0031152, - "model.embeddings.position_embeddings.weight": 5.5e-05, - "model.embeddings.token_type_embeddings.weight": -0.0015419, - "model.embeddings.LayerNorm.weight": 1.312345, - "model.embeddings.LayerNorm.bias": -0.0294608, - "model.encoder.layer.0.attention.self.query.weight": -0.0003949, - "model.encoder.layer.0.attention.self.query.bias": 0.0185744, - "model.encoder.layer.0.attention.self.key.weight": 0.0003863, - "model.encoder.layer.0.attention.self.key.bias": 0.0020557, - "model.encoder.layer.0.attention.self.value.weight": 4.22e-05, - "model.encoder.layer.0.attention.self.value.bias": 0.0065417, - "model.encoder.layer.0.attention.output.dense.weight": 3.01e-05, - "model.encoder.layer.0.attention.output.dense.bias": 0.0007209, - "model.encoder.layer.0.attention.output.LayerNorm.weight": 1.199831, - "model.encoder.layer.0.attention.output.LayerNorm.bias": 0.0608714, - "model.encoder.layer.0.intermediate.dense.weight": -0.0011731, - "model.encoder.layer.0.intermediate.dense.bias": -0.1219958, - "model.encoder.layer.0.output.dense.weight": -0.0002212, - "model.encoder.layer.0.output.dense.bias": -0.0013031, - "model.encoder.layer.0.output.LayerNorm.weight": 1.2419648, - "model.encoder.layer.0.output.LayerNorm.bias": 0.005295, - "model.encoder.layer.1.attention.self.query.weight": -0.0007321, - "model.encoder.layer.1.attention.self.query.bias": -0.0358397, - "model.encoder.layer.1.attention.self.key.weight": 0.0001333, - "model.encoder.layer.1.attention.self.key.bias": 0.0045062, - "model.encoder.layer.1.attention.self.value.weight": 0.0001012, - "model.encoder.layer.1.attention.self.value.bias": -0.0007094, - "model.encoder.layer.1.attention.output.dense.weight": -2.43e-05, - "model.encoder.layer.1.attention.output.dense.bias": 0.0041446, - "model.encoder.layer.1.attention.output.LayerNorm.weight": 1.0377343, - "model.encoder.layer.1.attention.output.LayerNorm.bias": 0.0443237, - "model.encoder.layer.1.intermediate.dense.weight": -0.001344, - "model.encoder.layer.1.intermediate.dense.bias": -0.1247257, - "model.encoder.layer.1.output.dense.weight": -5.32e-05, - "model.encoder.layer.1.output.dense.bias": 0.000677, - "model.encoder.layer.1.output.LayerNorm.weight": 1.017162, - "model.encoder.layer.1.output.LayerNorm.bias": -0.0474442, - "model.pooler.dense.weight": 0.0001295, - "model.pooler.dense.bias": -0.0052078, - "pooler.missing_embeddings": 0.0812017, - } - assert parameter_means == parameter_means_expected - - -def test_model_pickleable(model): - import pickle - - pickle.dumps(model) - - -@pytest.fixture -def model_output(model, inputs) -> OutputType: - # set seed to make sure the output is deterministic - torch.manual_seed(42) - return model(inputs) - - -def test_forward_logits(model_output, inputs): - assert isinstance(model_output, SequenceClassifierOutput) - - logits = model_output.logits - - torch.testing.assert_close( - logits, - torch.tensor( - [0.5338148474693298, 0.5866107940673828, 0.5076886415481567, 0.5946245789527893] - ), - ) - - -def test_decode(model, model_output, inputs): - decoded = model.decode(inputs=inputs, outputs=model_output) - assert isinstance(decoded, dict) - assert set(decoded) == {"scores"} - scores = decoded["scores"] - torch.testing.assert_close( - scores, - torch.tensor( - [0.5338148474693298, 0.5866107940673828, 0.5076886415481567, 0.5946245789527893] - ), - ) - - -@pytest.fixture -def batch(inputs, targets): - return inputs, targets - - -def test_training_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(0.8145309686660767)) - - -def test_validation_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(0.8145309686660767)) - - -def test_test_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(0.8145309686660767)) - - -def test_base_model_named_parameters(model): - base_model_named_parameters = dict(model.base_model_named_parameters()) - assert set(base_model_named_parameters) == { - "model.pooler.dense.bias", - "model.encoder.layer.0.intermediate.dense.weight", - "model.encoder.layer.0.intermediate.dense.bias", - "model.encoder.layer.1.attention.output.dense.weight", - "model.encoder.layer.1.attention.output.LayerNorm.weight", - "model.encoder.layer.1.attention.self.query.weight", - "model.encoder.layer.1.output.dense.weight", - "model.encoder.layer.0.output.dense.bias", - "model.encoder.layer.1.intermediate.dense.bias", - "model.encoder.layer.1.attention.self.value.bias", - "model.encoder.layer.0.attention.output.dense.weight", - "model.encoder.layer.0.attention.self.query.bias", - "model.encoder.layer.0.attention.self.value.bias", - "model.encoder.layer.1.output.dense.bias", - "model.encoder.layer.1.attention.self.query.bias", - "model.encoder.layer.1.attention.output.LayerNorm.bias", - "model.encoder.layer.0.attention.self.query.weight", - "model.encoder.layer.0.attention.output.LayerNorm.bias", - "model.encoder.layer.0.attention.self.key.bias", - "model.encoder.layer.1.intermediate.dense.weight", - "model.encoder.layer.1.output.LayerNorm.bias", - "model.encoder.layer.1.output.LayerNorm.weight", - "model.encoder.layer.0.attention.self.key.weight", - "model.encoder.layer.1.attention.output.dense.bias", - "model.encoder.layer.0.attention.output.dense.bias", - "model.embeddings.LayerNorm.bias", - "model.encoder.layer.0.attention.self.value.weight", - "model.encoder.layer.0.attention.output.LayerNorm.weight", - "model.embeddings.token_type_embeddings.weight", - "model.encoder.layer.0.output.LayerNorm.weight", - "model.embeddings.position_embeddings.weight", - "model.encoder.layer.1.attention.self.key.bias", - "model.embeddings.LayerNorm.weight", - "model.encoder.layer.0.output.LayerNorm.bias", - "model.encoder.layer.1.attention.self.key.weight", - "model.pooler.dense.weight", - "model.encoder.layer.0.output.dense.weight", - "model.embeddings.word_embeddings.weight", - "model.encoder.layer.1.attention.self.value.weight", - } - - -def test_task_named_parameters(model): - task_named_parameters = dict(model.task_named_parameters()) - assert set(task_named_parameters) == { - "pooler.missing_embeddings", - } - - -def test_configure_optimizers_with_warmup(): - model = SequencePairSimilarityModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - ) - model.trainer = Trainer(max_epochs=10) - optimizers_and_schedulers = model.configure_optimizers() - assert len(optimizers_and_schedulers) == 2 - optimizers, schedulers = optimizers_and_schedulers - assert len(optimizers) == 1 - assert len(schedulers) == 1 - optimizer = optimizers[0] - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults["lr"] == 1e-05 - assert optimizer.defaults["weight_decay"] == 0.01 - assert optimizer.defaults["eps"] == 1e-08 - - scheduler = schedulers[0] - assert isinstance(scheduler, dict) - assert set(scheduler) == {"scheduler", "interval"} - assert isinstance(scheduler["scheduler"], LambdaLR) - - -def test_configure_optimizers_with_task_learning_rate(monkeypatch): - model = SequencePairSimilarityModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - learning_rate=1e-5, - task_learning_rate=1e-3, - # disable warmup to make sure the scheduler is not added which would set the learning rate - # to 0 - warmup_proportion=0.0, - ) - optimizer = model.configure_optimizers() - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert len(optimizer.param_groups) == 2 - # base model parameters - param_group = optimizer.param_groups[0] - assert len(param_group["params"]) == 39 - assert param_group["lr"] == 1e-5 - # classifier head parameters - there is just the default embedding (which is not used) - param_group = optimizer.param_groups[1] - assert len(param_group["params"]) == 1 - assert param_group["lr"] == 1e-3 - # ensure that all parameters are covered - assert set(optimizer.param_groups[0]["params"] + optimizer.param_groups[1]["params"]) == set( - model.parameters() - ) - - -def test_freeze_base_model(monkeypatch, inputs, targets): - model = SequencePairSimilarityModelWithPooler( - model_name_or_path="prajjwal1/bert-tiny", - freeze_base_model=True, - # disable warmup to make sure the scheduler is not added which would set the learning rate - # to 0 - warmup_proportion=0.0, - ) - base_model_params = [param for name, param in model.base_model_named_parameters()] - task_params = [param for name, param in model.task_named_parameters()] - assert len(base_model_params) + len(task_params) == len(list(model.parameters())) - for param in base_model_params: - assert not param.requires_grad - for param in task_params: - assert param.requires_grad diff --git a/tests/models/test_simple_generative.py b/tests/models/test_simple_generative.py deleted file mode 100644 index 147ce1f67..000000000 --- a/tests/models/test_simple_generative.py +++ /dev/null @@ -1,439 +0,0 @@ -import math -from typing import List, Optional - -import pytest -import torch -from pytorch_lightning import Trainer -from torch.optim import Optimizer - -from pie_modules.models import SimpleGenerativeModel -from pie_modules.models.common import TESTING, VALIDATION -from pie_modules.taskmodules import TextToTextTaskModule -from tests.models import trunc_number - -MODEL_ID = "google/t5-efficient-tiny-nl2" - - -@pytest.fixture(scope="module") -def taskmodule(): - return TextToTextTaskModule( - tokenizer_name_or_path=MODEL_ID, - document_type="pie_modules.documents.TextDocumentWithAbstractiveSummary", - target_layer="abstractive_summary", - target_annotation_type="pie_modules.annotations.AbstractiveSummary", - tokenized_document_type="pie_modules.documents.TokenDocumentWithAbstractiveSummary", - text_metric_type="torchmetrics.text.ROUGEScore", - ) - - -@pytest.fixture(scope="module") -def model(taskmodule) -> SimpleGenerativeModel: - return SimpleGenerativeModel( - base_model={ - "_type_": "transformers.AutoModelForSeq2SeqLM", - "pretrained_model_name_or_path": MODEL_ID, - }, - # only use predictions for metrics in test stage to cover all cases (default is all stages) - metric_call_predict=[TESTING], - taskmodule_config=taskmodule.config, - # use a strange learning rate to make sure it is passed through - learning_rate=13e-3, - optimizer_type="torch.optim.Adam", - ) - - -def test_model(model): - assert model is not None - assert model.model is not None - assert model.taskmodule is not None - named_parameters = dict(model.named_parameters()) - parameter_means = {k: trunc_number(v.mean().item(), 7) for k, v in named_parameters.items()} - parameter_means_expected = { - "model.shared.weight": -0.3906954, - "model.encoder.block.0.layer.0.SelfAttention.q.weight": 2.15e-05, - "model.encoder.block.0.layer.0.SelfAttention.k.weight": -0.0015166, - "model.encoder.block.0.layer.0.SelfAttention.v.weight": -0.0018635, - "model.encoder.block.0.layer.0.SelfAttention.o.weight": 0.000866, - "model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": -2.8229351, - "model.encoder.block.0.layer.0.layer_norm.weight": 0.226491, - "model.encoder.block.0.layer.1.DenseReluDense.wi.weight": 0.0034651, - "model.encoder.block.0.layer.1.DenseReluDense.wo.weight": 0.00017, - "model.encoder.block.0.layer.1.layer_norm.weight": 1.2047424, - "model.encoder.block.1.layer.0.SelfAttention.q.weight": -7.88e-05, - "model.encoder.block.1.layer.0.SelfAttention.k.weight": -0.0017292, - "model.encoder.block.1.layer.0.SelfAttention.v.weight": -0.0025692, - "model.encoder.block.1.layer.0.SelfAttention.o.weight": 0.000484, - "model.encoder.block.1.layer.0.layer_norm.weight": 0.4024209, - "model.encoder.block.1.layer.1.DenseReluDense.wi.weight": 0.0012148, - "model.encoder.block.1.layer.1.DenseReluDense.wo.weight": -0.000555, - "model.encoder.block.1.layer.1.layer_norm.weight": 1.9719848, - "model.encoder.final_layer_norm.weight": 1.3045949, - "model.decoder.block.0.layer.0.SelfAttention.q.weight": 4.21e-05, - "model.decoder.block.0.layer.0.SelfAttention.k.weight": 0.0006944, - "model.decoder.block.0.layer.0.SelfAttention.v.weight": -0.0001296, - "model.decoder.block.0.layer.0.SelfAttention.o.weight": 0.0020978, - "model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": -0.5869011, - "model.decoder.block.0.layer.0.layer_norm.weight": 0.1958751, - "model.decoder.block.0.layer.1.EncDecAttention.q.weight": 7.8e-06, - "model.decoder.block.0.layer.1.EncDecAttention.k.weight": -0.0001409, - "model.decoder.block.0.layer.1.EncDecAttention.v.weight": -0.0010971, - "model.decoder.block.0.layer.1.EncDecAttention.o.weight": 0.0026751, - "model.decoder.block.0.layer.1.layer_norm.weight": 0.0658893, - "model.decoder.block.0.layer.2.DenseReluDense.wi.weight": 0.0012591, - "model.decoder.block.0.layer.2.DenseReluDense.wo.weight": 0.0033682, - "model.decoder.block.0.layer.2.layer_norm.weight": 2.9871673, - "model.decoder.block.1.layer.0.SelfAttention.q.weight": 6.16e-05, - "model.decoder.block.1.layer.0.SelfAttention.k.weight": 0.0004128, - "model.decoder.block.1.layer.0.SelfAttention.v.weight": -0.0003878, - "model.decoder.block.1.layer.0.SelfAttention.o.weight": -0.0040457, - "model.decoder.block.1.layer.0.layer_norm.weight": 1.1167399, - "model.decoder.block.1.layer.1.EncDecAttention.q.weight": -0.0001246, - "model.decoder.block.1.layer.1.EncDecAttention.k.weight": 0.0013352, - "model.decoder.block.1.layer.1.EncDecAttention.v.weight": -0.0024415, - "model.decoder.block.1.layer.1.EncDecAttention.o.weight": -9.83e-05, - "model.decoder.block.1.layer.1.layer_norm.weight": 0.0755381, - "model.decoder.block.1.layer.2.DenseReluDense.wi.weight": -0.0045786, - "model.decoder.block.1.layer.2.DenseReluDense.wo.weight": 0.0101685, - "model.decoder.block.1.layer.2.layer_norm.weight": 7.3835659, - "model.decoder.final_layer_norm.weight": 0.8366433, - } - assert parameter_means == parameter_means_expected - - -def test_model_pickleable(model): - import pickle - - pickle.dumps(model) - - -def test_model_without_taskmodule(caplog): - with caplog.at_level("WARNING"): - model = SimpleGenerativeModel( - base_model={ - "_type_": "transformers.AutoModelForSeq2SeqLM", - "pretrained_model_name_or_path": MODEL_ID, - }, - ) - assert model is not None - assert caplog.messages == [ - "No taskmodule is available, so no metrics are set up. Please provide a taskmodule_config " - "to enable metrics for stages ['train', 'val', 'test'].", - "No taskmodule is available, so no generation config will be created. Consider setting " - "taskmodule_config to a valid taskmodule config to use specific setup for generation.", - ] - - -def test_missing_base_model_and_type(): - with pytest.raises(ValueError) as excinfo: - SimpleGenerativeModel() - assert ( - str(excinfo.value) - == "Either base_model or base_model_type must be provided. If base_model is " - "not provided, base_model_type must be a valid model type, " - "e.g. 'transformers.AutoModelForSeq2SeqLM'." - ) - - -def test_model_with_deprecated_base_model_setup(caplog, taskmodule): - with caplog.at_level("WARNING"): - model = SimpleGenerativeModel( - base_model_type="transformers.AutoModelForSeq2SeqLM", - base_model_config=dict(pretrained_model_name_or_path=MODEL_ID), - taskmodule_config=taskmodule.config, - ) - assert model is not None - assert caplog.messages == [ - "The base_model_type and base_model_config arguments are deprecated. Please use base_model. " - "You can use the following code to create the base_model argument: " - "base_model = {'_type_': base_model_type, **base_model_config}", - ] - - -@pytest.fixture(scope="module") -def batch(model): - inputs = { - "input_ids": torch.tensor( - [ - [100, 19, 3, 9, 794, 1708, 1, 0, 0, 0, 0, 0], - [100, 19, 430, 794, 1708, 84, 19, 3, 9, 720, 1200, 1], - ] - ), - "attention_mask": torch.tensor( - [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] - ), - } - - targets = { - "labels": torch.tensor([[3, 9, 1708, 1, 0], [3, 9, 1200, 1708, 1]]), - "decoder_attention_mask": torch.tensor([[1, 1, 1, 1, 0], [1, 1, 1, 1, 1]]), - } - - return inputs, targets - - -def test_batch(batch, taskmodule): - inputs, targets = batch - input_ids_tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - assert input_ids_tokens == [ - [ - "▁This", - "▁is", - "▁", - "a", - "▁test", - "▁document", - "", - "", - "", - "", - "", - "", - ], - [ - "▁This", - "▁is", - "▁another", - "▁test", - "▁document", - "▁which", - "▁is", - "▁", - "a", - "▁bit", - "▁longer", - "", - ], - ] - - labels_tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(labels) for labels in targets["labels"] - ] - assert labels_tokens == [ - ["▁", "a", "▁document", "", ""], - ["▁", "a", "▁longer", "▁document", ""], - ] - - -def test_training_step(batch, model): - model.train() - torch.manual_seed(42) - metric = model._get_metric(VALIDATION, batch_idx=0) - metric.reset() - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(8.98222827911377)) - - metric_values = metric.compute() - metric_values_float = {key: value.item() for key, value in metric_values.items()} - - # we do not collect metrics during training, so all entries should be NaN - assert len(metric_values_float) > 0 - assert all([math.isnan(value) for value in metric_values_float.values()]) - - model.on_train_epoch_end() - - -def test_validation_step(batch, model): - model.eval() - torch.manual_seed(42) - metric = model._get_metric(VALIDATION, batch_idx=0) - metric.reset() - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(10.146586418151855)) - - metric_values = metric.compute() - metric_values_float = {key: value.item() for key, value in metric_values.items()} - assert metric_values_float == { - "rouge1_fmeasure": 0.0, - "rouge1_precision": 0.0, - "rouge1_recall": 0.0, - "rouge2_fmeasure": 0.0, - "rouge2_precision": 0.0, - "rouge2_recall": 0.0, - "rougeL_fmeasure": 0.0, - "rougeL_precision": 0.0, - "rougeL_recall": 0.0, - "rougeLsum_fmeasure": 0.0, - "rougeLsum_precision": 0.0, - "rougeLsum_recall": 0.0, - } - - model.on_validation_epoch_end() - - -def test_test_step(batch, model): - model.eval() - torch.manual_seed(42) - metric = model._get_metric(TESTING, batch_idx=0) - metric.reset() - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(10.146586418151855)) - - metric_values = metric.compute() - metric_values_float = {key: value.item() for key, value in metric_values.items()} - assert metric_values_float == { - "rouge1_fmeasure": 0.1111111119389534, - "rouge1_precision": 0.06666667014360428, - "rouge1_recall": 0.3333333432674408, - "rouge2_fmeasure": 0.0, - "rouge2_precision": 0.0, - "rouge2_recall": 0.0, - "rougeL_fmeasure": 0.1111111119389534, - "rougeL_precision": 0.06666667014360428, - "rougeL_recall": 0.3333333432674408, - "rougeLsum_fmeasure": 0.0555555559694767, - "rougeLsum_precision": 0.03333333507180214, - "rougeLsum_recall": 0.1666666716337204, - } - - model.on_test_epoch_end() - - -def test_predict_step(batch, model): - model.eval() - torch.manual_seed(42) - predictions = model.predict_step(batch, batch_idx=0) - labels = predictions["labels"] - assert labels is not None - torch.testing.assert_close( - labels, - torch.tensor( - [ - [32099, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [ - 32099, - 19, - 3, - 9, - 248, - 194, - 12, - 129, - 25, - 708, - 5, - 37, - 166, - 794, - 1708, - 19, - 3, - 9, - 794, - ], - ] - ), - ) - - predicted_tokens = [ - model.taskmodule.tokenizer.convert_ids_to_tokens(label) for label in labels - ] - assert predicted_tokens == [ - [ - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ], - [ - "", - "▁is", - "▁", - "a", - "▁great", - "▁way", - "▁to", - "▁get", - "▁you", - "▁started", - ".", - "▁The", - "▁first", - "▁test", - "▁document", - "▁is", - "▁", - "a", - "▁test", - ], - ] - - -@pytest.fixture(scope="module") -def optimizer(model): - return model.configure_optimizers() - - -def test_optimizer(optimizer): - assert optimizer is not None - assert isinstance(optimizer, torch.optim.Adam) - assert optimizer.defaults["lr"] == 13e-3 - assert len(optimizer.param_groups) == 1 - param_group = optimizer.param_groups[0] - assert len(param_group["params"]) == 47 - - -def _assert_optimizer( - actual: Optimizer, - expected: Optimizer, - allow_mismatching_param_group_keys: Optional[List[str]] = None, -): - allow_mismatching_param_group_key_set = set(allow_mismatching_param_group_keys or []) - assert actual is not None - assert isinstance(actual, type(expected)) - assert actual.defaults == expected.defaults - assert len(actual.param_groups) == len(expected.param_groups) - for actual_param_group, expected_param_group in zip( - actual.param_groups, expected.param_groups - ): - actual_keys = set(actual_param_group) - allow_mismatching_param_group_key_set - expected_keys = set(expected_param_group) - allow_mismatching_param_group_key_set - assert actual_keys == expected_keys - for key in actual_keys: - # also include the key in the comparison to have it in the assertion error message - assert (key, actual_param_group[key]) == (key, expected_param_group[key]) - - -def test_configure_optimizers_with_warmup(model, optimizer): - warmup_proportion_backup_value = model.warmup_proportion - scheduler_name_backup_value = model.scheduler_name - model.warmup_proportion = 0.1 - model.scheduler_name = "linear" - model.trainer = Trainer(max_epochs=10) - optimizer_and_schedular = model.configure_optimizers() - assert optimizer_and_schedular is not None - assert isinstance(optimizer_and_schedular, tuple) - assert len(optimizer_and_schedular) == 2 - optimizers, schedulers = optimizer_and_schedular - assert len(optimizers) == 1 - _assert_optimizer( - optimizers[0], optimizer, allow_mismatching_param_group_keys=["initial_lr", "lr"] - ) - assert len(schedulers) == 1 - assert set(schedulers[0]) == {"scheduler", "interval"} - scheduler = schedulers[0]["scheduler"] - assert isinstance(scheduler, torch.optim.lr_scheduler.LambdaLR) - assert scheduler.optimizer is optimizers[0] - assert scheduler.base_lrs == [13e-3] - - model.warmup_proportion = warmup_proportion_backup_value - model.scheduler_name = scheduler_name_backup_value diff --git a/tests/models/test_simple_sequence_classification.py b/tests/models/test_simple_sequence_classification.py deleted file mode 100644 index 1fe740cd3..000000000 --- a/tests/models/test_simple_sequence_classification.py +++ /dev/null @@ -1,550 +0,0 @@ -from typing import Dict - -import pytest -import torch -from pytorch_lightning import Trainer -from torch.optim.lr_scheduler import LambdaLR -from transformers.modeling_outputs import SequenceClassifierOutput - -from pie_modules.models import SimpleSequenceClassificationModel -from pie_modules.models.simple_sequence_classification import OutputType -from tests.models import trunc_number - -NUM_CLASSES = 4 - - -@pytest.fixture -def inputs() -> Dict[str, torch.LongTensor]: - result_dict = { - "input_ids": torch.tensor( - [ - [ - 101, - 28998, - 13832, - 3121, - 2340, - 138, - 28996, - 1759, - 1120, - 28999, - 139, - 28997, - 119, - 102, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28998, - 13832, - 3121, - 2340, - 144, - 28996, - 1759, - 1120, - 28999, - 145, - 28997, - 119, - 1262, - 1771, - 146, - 119, - 102, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28998, - 13832, - 3121, - 2340, - 144, - 28996, - 1759, - 1120, - 145, - 119, - 1262, - 1771, - 28999, - 146, - 28997, - 119, - 102, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 13832, - 3121, - 2340, - 144, - 1759, - 1120, - 28999, - 145, - 28997, - 119, - 1262, - 1771, - 28998, - 146, - 28996, - 119, - 102, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28998, - 13832, - 3121, - 2340, - 150, - 28996, - 1759, - 1120, - 28999, - 151, - 28997, - 119, - 1262, - 1122, - 1771, - 152, - 119, - 102, - ], - [ - 101, - 1752, - 5650, - 119, - 13832, - 3121, - 2340, - 150, - 1759, - 1120, - 151, - 119, - 1262, - 28998, - 1122, - 28996, - 1771, - 28999, - 152, - 28997, - 119, - 102, - ], - [ - 101, - 1752, - 5650, - 119, - 13832, - 3121, - 2340, - 150, - 1759, - 1120, - 151, - 119, - 1262, - 28999, - 1122, - 28997, - 1771, - 28998, - 152, - 28996, - 119, - 102, - ], - ] - ).to(torch.long), - "attention_mask": torch.tensor( - [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ).to(torch.long), - } - - return result_dict - - -@pytest.fixture -def targets() -> Dict[str, torch.LongTensor]: - return {"labels": torch.tensor([0, 1, 2, 3, 1, 2, 3]).to(torch.long)} - - -@pytest.fixture -def model() -> SimpleSequenceClassificationModel: - torch.manual_seed(42) - result = SimpleSequenceClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - ) - return result - - -def test_model(model): - assert model is not None - named_parameters = dict(model.named_parameters()) - parameter_means = {k: trunc_number(v.mean().item(), 7) for k, v in named_parameters.items()} - parameter_means_expected = { - "model.bert.embeddings.word_embeddings.weight": 0.0031152, - "model.bert.embeddings.position_embeddings.weight": 5.5e-05, - "model.bert.embeddings.token_type_embeddings.weight": -0.0015419, - "model.bert.embeddings.LayerNorm.weight": 1.312345, - "model.bert.embeddings.LayerNorm.bias": -0.0294608, - "model.bert.encoder.layer.0.attention.self.query.weight": -0.0003949, - "model.bert.encoder.layer.0.attention.self.query.bias": 0.0185744, - "model.bert.encoder.layer.0.attention.self.key.weight": 0.0003863, - "model.bert.encoder.layer.0.attention.self.key.bias": 0.0020557, - "model.bert.encoder.layer.0.attention.self.value.weight": 4.22e-05, - "model.bert.encoder.layer.0.attention.self.value.bias": 0.0065417, - "model.bert.encoder.layer.0.attention.output.dense.weight": 3.01e-05, - "model.bert.encoder.layer.0.attention.output.dense.bias": 0.0007209, - "model.bert.encoder.layer.0.attention.output.LayerNorm.weight": 1.199831, - "model.bert.encoder.layer.0.attention.output.LayerNorm.bias": 0.0608714, - "model.bert.encoder.layer.0.intermediate.dense.weight": -0.0011731, - "model.bert.encoder.layer.0.intermediate.dense.bias": -0.1219958, - "model.bert.encoder.layer.0.output.dense.weight": -0.0002212, - "model.bert.encoder.layer.0.output.dense.bias": -0.0013031, - "model.bert.encoder.layer.0.output.LayerNorm.weight": 1.2419648, - "model.bert.encoder.layer.0.output.LayerNorm.bias": 0.005295, - "model.bert.encoder.layer.1.attention.self.query.weight": -0.0007321, - "model.bert.encoder.layer.1.attention.self.query.bias": -0.0358397, - "model.bert.encoder.layer.1.attention.self.key.weight": 0.0001333, - "model.bert.encoder.layer.1.attention.self.key.bias": 0.0045062, - "model.bert.encoder.layer.1.attention.self.value.weight": 0.0001012, - "model.bert.encoder.layer.1.attention.self.value.bias": -0.0007094, - "model.bert.encoder.layer.1.attention.output.dense.weight": -2.43e-05, - "model.bert.encoder.layer.1.attention.output.dense.bias": 0.0041446, - "model.bert.encoder.layer.1.attention.output.LayerNorm.weight": 1.0377343, - "model.bert.encoder.layer.1.attention.output.LayerNorm.bias": 0.0443237, - "model.bert.encoder.layer.1.intermediate.dense.weight": -0.001344, - "model.bert.encoder.layer.1.intermediate.dense.bias": -0.1247257, - "model.bert.encoder.layer.1.output.dense.weight": -5.32e-05, - "model.bert.encoder.layer.1.output.dense.bias": 0.000677, - "model.bert.encoder.layer.1.output.LayerNorm.weight": 1.017162, - "model.bert.encoder.layer.1.output.LayerNorm.bias": -0.0474442, - "model.bert.pooler.dense.weight": 0.0001295, - "model.bert.pooler.dense.bias": -0.0052078, - "model.classifier.weight": 0.0005538, - "model.classifier.bias": 0.0, - } - assert parameter_means == parameter_means_expected - - -def test_model_pickleable(model): - import pickle - - pickle.dumps(model) - - -@pytest.fixture -def model_output(model, inputs) -> OutputType: - # set seed to make sure the output is deterministic - torch.manual_seed(42) - return model(inputs) - - -def test_forward(model_output, inputs): - batch_size = inputs["input_ids"].shape[0] - assert isinstance(model_output, SequenceClassifierOutput) - assert set(model_output) == {"logits"} - logits = model_output["logits"] - - assert logits.shape == (batch_size, NUM_CLASSES) - - torch.testing.assert_close( - logits, - torch.tensor( - [ - [ - 0.16545572876930237, - 0.17544983327388763, - -0.011048287153244019, - 0.05337674915790558, - ], - [ - 0.14748695492744446, - 0.16249355673789978, - -0.058017998933792114, - 0.025398850440979004, - ], - [ - 0.14271709322929382, - 0.16188383102416992, - -0.061113521456718445, - 0.026494741439819336, - ], - [ - 0.15641027688980103, - 0.17225395143032074, - -0.05567866563796997, - 0.022433891892433167, - ], - [ - 0.15785054862499237, - 0.16935551166534424, - -0.054724469780921936, - 0.012338697910308838, - ], - [ - 0.16152460873126984, - 0.17789196968078613, - -0.053754448890686035, - 0.008724510669708252, - ], - [ - 0.16836002469062805, - 0.17842254042625427, - -0.052499815821647644, - 0.006823211908340454, - ], - ] - ), - ) - - -def test_decode(model, model_output, inputs): - decoded = model.decode(inputs=inputs, outputs=model_output) - assert isinstance(decoded, dict) - assert set(decoded) == {"labels", "probabilities"} - labels = decoded["labels"] - assert labels.shape == (inputs["input_ids"].shape[0],) - torch.testing.assert_close( - labels, - torch.tensor([1, 1, 1, 1, 1, 1, 1]), - ) - probabilities = decoded["probabilities"] - assert probabilities.shape == (inputs["input_ids"].shape[0], NUM_CLASSES) - torch.testing.assert_close( - probabilities, - torch.tensor( - [ - [ - 0.2672215402126312, - 0.26990556716918945, - 0.22398385405540466, - 0.23888900876045227, - ], - [ - 0.26922059059143066, - 0.27329114079475403, - 0.21920911967754364, - 0.23827917873859406, - ], - [0.2684398889541626, 0.2736346125602722, 0.21893969178199768, 0.23898591101169586], - [0.2703087329864502, 0.2746255099773407, 0.21865077316761017, 0.23641489446163177], - [0.2713961601257324, 0.2745365798473358, 0.21942369639873505, 0.2346435934305191], - [ - 0.27165648341178894, - 0.27613937854766846, - 0.21904107928276062, - 0.23316311836242676, - ], - [0.2730168402194977, 0.2757779359817505, 0.21891282498836517, 0.23229233920574188], - ] - ), - ) - - -@pytest.fixture -def batch(inputs, targets): - return inputs, targets - - -def test_training_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.4069921970367432)) - - -def test_validation_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.4069921970367432)) - - -def test_test_step(batch, model): - # set the seed to make sure the loss is deterministic - torch.manual_seed(42) - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.4069921970367432)) - - -def test_base_model_named_parameters(model): - base_model_named_parameters = dict(model.base_model_named_parameters()) - assert set(base_model_named_parameters) == { - "model.bert.pooler.dense.bias", - "model.bert.encoder.layer.0.intermediate.dense.weight", - "model.bert.encoder.layer.0.intermediate.dense.bias", - "model.bert.encoder.layer.1.attention.output.dense.weight", - "model.bert.encoder.layer.1.attention.output.LayerNorm.weight", - "model.bert.encoder.layer.1.attention.self.query.weight", - "model.bert.encoder.layer.1.output.dense.weight", - "model.bert.encoder.layer.0.output.dense.bias", - "model.bert.encoder.layer.1.intermediate.dense.bias", - "model.bert.encoder.layer.1.attention.self.value.bias", - "model.bert.encoder.layer.0.attention.output.dense.weight", - "model.bert.encoder.layer.0.attention.self.query.bias", - "model.bert.encoder.layer.0.attention.self.value.bias", - "model.bert.encoder.layer.1.output.dense.bias", - "model.bert.encoder.layer.1.attention.self.query.bias", - "model.bert.encoder.layer.1.attention.output.LayerNorm.bias", - "model.bert.encoder.layer.0.attention.self.query.weight", - "model.bert.encoder.layer.0.attention.output.LayerNorm.bias", - "model.bert.encoder.layer.0.attention.self.key.bias", - "model.bert.encoder.layer.1.intermediate.dense.weight", - "model.bert.encoder.layer.1.output.LayerNorm.bias", - "model.bert.encoder.layer.1.output.LayerNorm.weight", - "model.bert.encoder.layer.0.attention.self.key.weight", - "model.bert.encoder.layer.1.attention.output.dense.bias", - "model.bert.encoder.layer.0.attention.output.dense.bias", - "model.bert.embeddings.LayerNorm.bias", - "model.bert.encoder.layer.0.attention.self.value.weight", - "model.bert.encoder.layer.0.attention.output.LayerNorm.weight", - "model.bert.embeddings.token_type_embeddings.weight", - "model.bert.encoder.layer.0.output.LayerNorm.weight", - "model.bert.embeddings.position_embeddings.weight", - "model.bert.encoder.layer.1.attention.self.key.bias", - "model.bert.embeddings.LayerNorm.weight", - "model.bert.encoder.layer.0.output.LayerNorm.bias", - "model.bert.encoder.layer.1.attention.self.key.weight", - "model.bert.pooler.dense.weight", - "model.bert.encoder.layer.0.output.dense.weight", - "model.bert.embeddings.word_embeddings.weight", - "model.bert.encoder.layer.1.attention.self.value.weight", - } - - -def test_task_named_parameters(model): - task_named_parameters = dict(model.task_named_parameters()) - assert set(task_named_parameters) == { - "model.classifier.weight", - "model.classifier.bias", - } - - -def test_configure_optimizers_with_warmup(): - model = SimpleSequenceClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - ) - model.trainer = Trainer(max_epochs=10) - optimizers_and_schedulers = model.configure_optimizers() - assert len(optimizers_and_schedulers) == 2 - optimizers, schedulers = optimizers_and_schedulers - assert len(optimizers) == 1 - assert len(schedulers) == 1 - optimizer = optimizers[0] - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults["lr"] == 1e-05 - assert optimizer.defaults["weight_decay"] == 0.01 - assert optimizer.defaults["eps"] == 1e-08 - - scheduler = schedulers[0] - assert isinstance(scheduler, dict) - assert set(scheduler) == {"scheduler", "interval"} - assert isinstance(scheduler["scheduler"], LambdaLR) - - -def test_configure_optimizers_with_task_learning_rate(monkeypatch): - model = SimpleSequenceClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - learning_rate=1e-5, - task_learning_rate=1e-3, - # disable warmup to make sure the scheduler is not added which would set the learning rate - # to 0 - warmup_proportion=0.0, - ) - optimizer = model.configure_optimizers() - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert len(optimizer.param_groups) == 2 - # base model parameters - param_group = optimizer.param_groups[0] - assert len(param_group["params"]) == 39 - assert param_group["lr"] == 1e-5 - # classifier head parameters - param_group = optimizer.param_groups[1] - assert len(param_group["params"]) == 2 - assert param_group["lr"] == 1e-3 - # ensure that all parameters are covered - assert set(optimizer.param_groups[0]["params"] + optimizer.param_groups[1]["params"]) == set( - model.parameters() - ) - - -def test_freeze_base_model(monkeypatch, inputs, targets): - model = SimpleSequenceClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - freeze_base_model=True, - # disable warmup to make sure the scheduler is not added which would set the learning rate - # to 0 - warmup_proportion=0.0, - ) - base_model_params = [param for name, param in model.base_model_named_parameters()] - task_params = [param for name, param in model.task_named_parameters()] - assert len(base_model_params) + len(task_params) == len(list(model.parameters())) - for param in base_model_params: - assert not param.requires_grad - for param in task_params: - assert param.requires_grad - - -def test_base_model_named_parameters_wrong_prefix(monkeypatch): - model = SimpleSequenceClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - base_model_prefix="wrong_prefix", - ) - with pytest.raises(ValueError) as excinfo: - model.base_model_named_parameters() - assert ( - str(excinfo.value) - == "Base model with prefix 'wrong_prefix' not found in BertForSequenceClassification" - ) diff --git a/tests/models/test_simple_token_classification.py b/tests/models/test_simple_token_classification.py deleted file mode 100644 index f6b190f6a..000000000 --- a/tests/models/test_simple_token_classification.py +++ /dev/null @@ -1,453 +0,0 @@ -import pytest -import torch - -from pie_modules.models import SimpleTokenClassificationModel -from pie_modules.models.common import TESTING, TRAINING, VALIDATION -from pie_modules.taskmodules import LabeledSpanExtractionByTokenClassificationTaskModule -from tests import _config_to_str -from tests.models import trunc_number - -CONFIGS = [{}] -CONFIG_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIG_DICT.keys()) -def config_str(request): - return request.param - - -@pytest.fixture(scope="module") -def config(config_str): - return CONFIG_DICT[config_str] - - -@pytest.fixture -def taskmodule_config(): - return { - "taskmodule_type": "LabeledSpanExtractionByTokenClassificationTaskModule", - "tokenizer_name_or_path": "bert-base-cased", - "span_annotation": "entities", - "partition_annotation": None, - "label_pad_id": -100, - "labels": ["ORG", "PER"], - "include_ill_formed_predictions": True, - "tokenize_kwargs": None, - "pad_kwargs": None, - "combine_token_scores_method": "mean", - "log_precision_recall_metrics": True, - } - - -def test_taskmodule_config(documents, taskmodule_config): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - span_annotation="entities", - tokenizer_name_or_path=tokenizer_name_or_path, - ) - taskmodule.prepare(documents) - assert taskmodule.config == taskmodule_config - - -def test_batch(documents, batch, taskmodule_config): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule.from_config( - taskmodule_config - ) - encodings = taskmodule.encode(documents, encode_target=True) - # just take the first 4 encodings - batch_from_documents = taskmodule.collate(encodings[:4]) - - inputs, targets = batch - inputs_from_documents, targets_from_documents = batch_from_documents - assert set(inputs) == set(inputs_from_documents) - torch.testing.assert_close(inputs["input_ids"], inputs_from_documents["input_ids"]) - torch.testing.assert_close(inputs["attention_mask"], inputs_from_documents["attention_mask"]) - torch.testing.assert_close(targets, targets_from_documents) - - -@pytest.fixture -def batch(): - inputs = { - "input_ids": torch.tensor( - [ - [101, 138, 1423, 5650, 119, 102, 0, 0, 0, 0, 0, 0], - [101, 13832, 3121, 2340, 138, 1759, 1120, 139, 119, 102, 0, 0], - [101, 13832, 3121, 2340, 140, 1105, 141, 119, 102, 0, 0, 0], - [101, 1752, 5650, 119, 13832, 3121, 2340, 142, 1105, 143, 119, 102], - ] - ).to(torch.long), - "attention_mask": torch.tensor( - [ - [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ), - "special_tokens_mask": torch.tensor( - [ - [1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], - [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], - [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], - ] - ), - } - targets = { - "labels": torch.tensor( - [ - [-100, 0, 0, 0, 0, -100, -100, -100, -100, -100, -100, -100], - [-100, 3, 4, 4, 4, 0, 0, 1, 0, -100, -100, -100], - [-100, 3, 4, 4, 4, 0, 1, 0, -100, -100, -100, -100], - [-100, 0, 0, 0, 3, 4, 4, 4, 0, 1, 0, -100], - ] - ) - } - return inputs, targets - - -@pytest.fixture -def model(monkeypatch, batch, config, taskmodule_config) -> SimpleTokenClassificationModel: - torch.manual_seed(42) - model = SimpleTokenClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=5, - taskmodule_config=taskmodule_config, - metric_stages=["val", "test"], - ) - return model - - -def test_model(model): - assert model is not None - named_parameters = dict(model.named_parameters()) - parameter_means = {k: trunc_number(v.mean().item(), 7) for k, v in named_parameters.items()} - parameter_means_expected = { - "model.bert.embeddings.word_embeddings.weight": 0.0031152, - "model.bert.embeddings.position_embeddings.weight": 5.5e-05, - "model.bert.embeddings.token_type_embeddings.weight": -0.0015419, - "model.bert.embeddings.LayerNorm.weight": 1.312345, - "model.bert.embeddings.LayerNorm.bias": -0.0294608, - "model.bert.encoder.layer.0.attention.self.query.weight": -0.0003949, - "model.bert.encoder.layer.0.attention.self.query.bias": 0.0185744, - "model.bert.encoder.layer.0.attention.self.key.weight": 0.0003863, - "model.bert.encoder.layer.0.attention.self.key.bias": 0.0020557, - "model.bert.encoder.layer.0.attention.self.value.weight": 4.22e-05, - "model.bert.encoder.layer.0.attention.self.value.bias": 0.0065417, - "model.bert.encoder.layer.0.attention.output.dense.weight": 3.01e-05, - "model.bert.encoder.layer.0.attention.output.dense.bias": 0.0007209, - "model.bert.encoder.layer.0.attention.output.LayerNorm.weight": 1.199831, - "model.bert.encoder.layer.0.attention.output.LayerNorm.bias": 0.0608714, - "model.bert.encoder.layer.0.intermediate.dense.weight": -0.0011731, - "model.bert.encoder.layer.0.intermediate.dense.bias": -0.1219958, - "model.bert.encoder.layer.0.output.dense.weight": -0.0002212, - "model.bert.encoder.layer.0.output.dense.bias": -0.0013031, - "model.bert.encoder.layer.0.output.LayerNorm.weight": 1.2419648, - "model.bert.encoder.layer.0.output.LayerNorm.bias": 0.005295, - "model.bert.encoder.layer.1.attention.self.query.weight": -0.0007321, - "model.bert.encoder.layer.1.attention.self.query.bias": -0.0358397, - "model.bert.encoder.layer.1.attention.self.key.weight": 0.0001333, - "model.bert.encoder.layer.1.attention.self.key.bias": 0.0045062, - "model.bert.encoder.layer.1.attention.self.value.weight": 0.0001012, - "model.bert.encoder.layer.1.attention.self.value.bias": -0.0007094, - "model.bert.encoder.layer.1.attention.output.dense.weight": -2.43e-05, - "model.bert.encoder.layer.1.attention.output.dense.bias": 0.0041446, - "model.bert.encoder.layer.1.attention.output.LayerNorm.weight": 1.0377343, - "model.bert.encoder.layer.1.attention.output.LayerNorm.bias": 0.0443237, - "model.bert.encoder.layer.1.intermediate.dense.weight": -0.001344, - "model.bert.encoder.layer.1.intermediate.dense.bias": -0.1247257, - "model.bert.encoder.layer.1.output.dense.weight": -5.32e-05, - "model.bert.encoder.layer.1.output.dense.bias": 0.000677, - "model.bert.encoder.layer.1.output.LayerNorm.weight": 1.017162, - "model.bert.encoder.layer.1.output.LayerNorm.bias": -0.0474442, - "model.classifier.weight": 0.0005138, - "model.classifier.bias": 0.0, - } - assert parameter_means == parameter_means_expected - - -def test_model_pickleable(model): - import pickle - - pickle.dumps(model) - - -def test_forward(batch, model): - inputs, targets = batch - batch_size, seq_len = inputs["input_ids"].shape - num_classes = model.config["num_classes"] - - # set seed to make sure the output is deterministic - torch.manual_seed(42) - output = model.forward(inputs) - assert set(output) == {"logits"} - logits = output["logits"] - assert logits.shape == (batch_size, seq_len, num_classes) - # check the first batch entry - torch.testing.assert_close( - logits[0], - torch.tensor( - [ - [ - -0.13442197442054749, - -0.06983129680156708, - 0.17513807117938995, - -0.24002864956855774, - 0.08871676027774811, - ], - [ - -0.032687313854694366, - -0.2071131318807602, - 0.10695032775402069, - -0.05829116329550743, - -0.21174949407577515, - ], - [ - -0.17153336107730865, - -0.2230629324913025, - -0.11457862704992294, - 0.03658870607614517, - -0.242639422416687, - ], - [ - -0.07552017271518707, - -0.20950022339820862, - 0.041016221046447754, - -0.13453879952430725, - -0.09942213445901871, - ], - [ - -0.19299760460853577, - -0.2081824392080307, - 0.20880958437919617, - -0.028745755553245544, - -0.14375154674053192, - ], - [ - -0.20548884570598602, - -0.17012161016464233, - 0.0647551566362381, - -0.090476393699646, - -0.1362220048904419, - ], - [ - -0.09553629904985428, - -0.1303575187921524, - 0.2995688021183014, - -0.04689876735210419, - -0.17737819254398346, - ], - [ - -0.030023209750652313, - -0.12308696657419205, - 0.2582213580608368, - -0.04085375368595123, - -0.16487300395965576, - ], - [ - -0.04765648394823074, - -0.18347612023353577, - 0.24941012263298035, - 0.022468380630016327, - -0.19706891477108002, - ], - [ - -0.09828818589448929, - -0.18449409306049347, - 0.2711920738220215, - 0.044708192348480225, - -0.15743865072727203, - ], - [ - -0.13639293611049652, - -0.16482298076152802, - 0.3018418848514557, - 0.0815257728099823, - -0.15574774146080017, - ], - [ - -0.14846578240394592, - -0.17294010519981384, - 0.31513816118240356, - 0.10425455123186111, - -0.16388092935085297, - ], - ] - ), - ) - - # check the sums per sequence - torch.testing.assert_close( - logits.sum(1), - torch.tensor( - [ - [ - -1.3690122365951538, - -2.0469894409179688, - 2.1774630546569824, - -0.35028770565986633, - -1.7614551782608032, - ], - [ - -0.892522394657135, - -1.3144632577896118, - 2.683281898498535, - -1.4629074335098267, - -3.3516180515289307, - ], - [ - -1.3936796188354492, - 0.21844607591629028, - 4.501010417938232, - -0.15485064685344696, - -2.651848316192627, - ], - [ - -1.7388781309127808, - -0.7211084365844727, - 3.463726043701172, - -0.2992384433746338, - -2.65508770942688, - ], - ] - ), - ) - - -def test_training_step_and_on_epoch_end(batch, model, config): - assert model._get_metric(TRAINING) is None - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.730902075767517)) - - model.on_train_epoch_end() - - -def test_validation_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(VALIDATION) - metric.reset() - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.730902075767517)) - metric_values = {k: v.item() for k, v in metric.compute().items()} - assert metric_values == { - "span/ORG/f1": 0.0, - "span/ORG/precision": 0.0, - "span/ORG/recall": 0.0, - "span/PER/f1": 0.0, - "span/PER/precision": 0.0, - "span/PER/recall": 0.0, - "span/macro/f1": 0.0, - "span/macro/precision": 0.0, - "span/macro/recall": 0.0, - "span/micro/f1": 0.0, - "span/micro/precision": 0.0, - "span/micro/recall": 0.0, - "token/macro/f1": 0.0, - "token/micro/f1": 0.0, - "token/macro/precision": 0.0, - "token/macro/recall": 0.0, - "token/micro/precision": 0.0, - "token/micro/recall": 0.0, - } - - model.on_validation_epoch_end() - - -def test_test_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(TESTING) - metric.reset() - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - torch.testing.assert_close(loss, torch.tensor(1.730902075767517)) - metric_values = {k: v.item() for k, v in metric.compute().items()} - assert metric_values == { - "span/ORG/f1": 0.0, - "span/ORG/precision": 0.0, - "span/ORG/recall": 0.0, - "span/PER/f1": 0.0, - "span/PER/precision": 0.0, - "span/PER/recall": 0.0, - "span/macro/f1": 0.0, - "span/macro/precision": 0.0, - "span/macro/recall": 0.0, - "span/micro/f1": 0.0, - "span/micro/precision": 0.0, - "span/micro/recall": 0.0, - "token/macro/f1": 0.0, - "token/micro/f1": 0.0, - "token/macro/precision": 0.0, - "token/macro/recall": 0.0, - "token/micro/precision": 0.0, - "token/micro/recall": 0.0, - } - - model.on_test_epoch_end() - - -@pytest.mark.parametrize("test_step", [False, True]) -def test_predict_and_predict_step(model, batch, config, test_step): - torch.manual_seed(42) - if test_step: - predictions = model.predict_step(batch, batch_idx=0, dataloader_idx=0) - else: - predictions = model.predict(batch[0]) - assert set(predictions) == {"labels", "probabilities"} - - assert predictions["labels"].shape == batch[1]["labels"].shape - torch.testing.assert_close( - predictions["labels"], - torch.tensor( - [ - [-100, 2, 3, 2, 2, -100, -100, -100, -100, -100, -100, -100], - [-100, 2, 2, 2, 2, 2, 2, 2, 2, -100, -100, -100], - [-100, 2, 2, 2, 2, 2, 2, 2, -100, -100, -100, -100], - [-100, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, -100], - ] - ), - ) - torch.testing.assert_close( - # just check the first two batch entries - predictions["probabilities"][:2].round(decimals=4), - torch.tensor( - [ - [ - [0.1792, 0.1912, 0.2443, 0.1613, 0.2240], - [0.2083, 0.1750, 0.2395, 0.2030, 0.1742], - [0.1934, 0.1837, 0.2047, 0.2381, 0.1801], - [0.2034, 0.1779, 0.2285, 0.1917, 0.1986], - [0.1752, 0.1725, 0.2618, 0.2065, 0.1840], - [0.1805, 0.1870, 0.2365, 0.2025, 0.1935], - [0.1844, 0.1781, 0.2738, 0.1936, 0.1700], - [0.1958, 0.1784, 0.2612, 0.1937, 0.1711], - [0.1941, 0.1694, 0.2612, 0.2082, 0.1671], - [0.1831, 0.1680, 0.2650, 0.2113, 0.1726], - [0.1740, 0.1691, 0.2697, 0.2164, 0.1707], - [0.1713, 0.1672, 0.2723, 0.2205, 0.1687], - ], - [ - [0.1654, 0.1989, 0.2729, 0.1542, 0.2086], - [0.1787, 0.1511, 0.3093, 0.1968, 0.1641], - [0.1888, 0.1966, 0.2365, 0.2081, 0.1700], - [0.2092, 0.1935, 0.2428, 0.2034, 0.1511], - [0.2275, 0.1784, 0.2546, 0.1856, 0.1539], - [0.2254, 0.1959, 0.2377, 0.1873, 0.1536], - [0.2177, 0.1879, 0.2485, 0.1975, 0.1484], - [0.2227, 0.1906, 0.2541, 0.1906, 0.1420], - [0.2080, 0.2098, 0.2667, 0.1764, 0.1391], - [0.1815, 0.2015, 0.2600, 0.1852, 0.1718], - [0.1672, 0.1883, 0.3065, 0.1773, 0.1607], - [0.1750, 0.1846, 0.2911, 0.1862, 0.1630], - ], - ] - ), - ) - - -def test_configure_optimizers(model): - optimizer = model.configure_optimizers() - assert optimizer is not None - assert isinstance(optimizer, torch.optim.Adam) - assert optimizer.defaults["lr"] == 1e-05 - assert len(optimizer.param_groups) == 1 - assert len(optimizer.param_groups[0]["params"]) > 0 - assert set(optimizer.param_groups[0]["params"]) == set(model.parameters()) diff --git a/tests/models/test_span_tuple_classification.py b/tests/models/test_span_tuple_classification.py deleted file mode 100644 index f984fdbdb..000000000 --- a/tests/models/test_span_tuple_classification.py +++ /dev/null @@ -1,549 +0,0 @@ -import pytest -import torch -from pytorch_lightning import Trainer -from torch import tensor - -from pie_modules.models import SpanTupleClassificationModel -from pie_modules.models.common import TESTING, TRAINING, VALIDATION -from pie_modules.taskmodules import RESpanPairClassificationTaskModule -from tests import _config_to_str - -CONFIGS = [{}] -CONFIG_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} -NUM_CLASSES = 4 - - -@pytest.fixture(scope="module", params=CONFIG_DICT.keys()) -def config_str(request): - return request.param - - -@pytest.fixture(scope="module") -def config(config_str): - return CONFIG_DICT[config_str] - - -@pytest.fixture -def taskmodule_config(): - return { - "taskmodule_type": "RESpanPairClassificationTaskModule", - "tokenizer_name_or_path": "bert-base-cased", - "relation_annotation": "relations", - "no_relation_label": "no_relation", - "partition_annotation": None, - "tokenize_kwargs": None, - "create_candidate_relations": False, - "create_candidate_relations_kwargs": None, - "labels": ["org:founded_by", "per:employee_of", "per:founder"], - "entity_labels": ["ORG", "PER"], - "add_type_to_marker": True, - "log_first_n_examples": 0, - "collect_statistics": False, - } - - -def test_taskmodule_config(documents, taskmodule_config): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RESpanPairClassificationTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - ) - taskmodule.prepare(documents) - assert taskmodule.config == taskmodule_config - assert len(taskmodule.id_to_label) == NUM_CLASSES - - -def test_batch(documents, batch, taskmodule_config): - taskmodule = RESpanPairClassificationTaskModule.from_config(taskmodule_config) - encodings = taskmodule.encode(documents, encode_target=True, as_dataset=True) - batch_from_documents = taskmodule.collate(encodings[:4]) - - inputs, targets = batch - inputs_from_documents, targets_from_documents = batch_from_documents - assert set(inputs) == set(inputs_from_documents) - for key in inputs: - torch.testing.assert_close(inputs[key], inputs_from_documents[key]) - assert set(targets) == set(targets_from_documents) - for key in targets: - torch.testing.assert_close(targets[key], targets_from_documents[key]) - - -@pytest.fixture -def batch(): - inputs = { - "input_ids": tensor( - [ - [ - 101, - 28996, - 13832, - 3121, - 2340, - 138, - 28998, - 1759, - 1120, - 28999, - 139, - 28997, - 119, - 102, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28996, - 13832, - 3121, - 2340, - 144, - 28998, - 1759, - 1120, - 28999, - 145, - 28997, - 119, - 1262, - 1771, - 28999, - 146, - 28997, - 119, - 102, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28996, - 13832, - 3121, - 2340, - 150, - 28998, - 1759, - 1120, - 28999, - 151, - 28997, - 119, - 1262, - 28996, - 1122, - 28998, - 1771, - 28999, - 152, - 28997, - 119, - 102, - ], - ] - ), - "attention_mask": tensor( - [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ), - "span_start_indices": tensor([[1, 9, 0, 0], [4, 12, 18, 0], [4, 12, 17, 21]]), - "span_end_indices": tensor([[7, 12, 0, 0], [10, 15, 21, 0], [10, 15, 20, 24]]), - "tuple_indices": tensor( - [[[0, 1], [-1, -1], [-1, -1]], [[0, 1], [0, 2], [2, 1]], [[0, 1], [2, 3], [3, 2]]] - ), - "tuple_indices_mask": tensor( - [[True, False, False], [True, True, True], [True, True, True]] - ), - } - targets = {"labels": tensor([[2, -100, -100], [2, 3, 1], [2, 3, 1]])} - return inputs, targets - - -@pytest.fixture -def model(batch, config, taskmodule_config) -> SpanTupleClassificationModel: - torch.manual_seed(42) - model = SpanTupleClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - taskmodule_config=taskmodule_config, - metric_stages=["val", "test"], - **config, - ) - return model - - -def test_model_pickleable(model): - import pickle - - pickle.dumps(model) - - -def test_freeze_base_model(): - model = SpanTupleClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - freeze_base_model=True, - ) - - base_model_params = dict(model.model.named_parameters(prefix="model")) - assert len(base_model_params) > 0 - for param in base_model_params.values(): - assert not param.requires_grad - task_params = { - name: param for name, param in model.named_parameters() if name not in base_model_params - } - assert len(task_params) > 0 - for param in task_params.values(): - assert param.requires_grad - - -def test_tune_base_model(): - model = SpanTupleClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=5, - ) - base_model_params = dict(model.model.named_parameters(prefix="model")) - assert len(base_model_params) > 0 - for param in base_model_params.values(): - assert param.requires_grad - task_params = { - name: param for name, param in model.named_parameters() if name not in base_model_params - } - assert len(task_params) > 0 - for param in task_params.values(): - assert param.requires_grad - - -@pytest.mark.parametrize( - "span_embedding_mode", ["start_and_end_token", "start_token", "end_token"] -) -@pytest.mark.parametrize( - "tuple_embedding_mode", ["concat", "multiply2_and_concat", "index_0", "index_1"] -) -def test_forward_embeddings(batch, taskmodule_config, span_embedding_mode, tuple_embedding_mode): - torch.manual_seed(42) - simple_model = SpanTupleClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - # disable the tuple mlp to allow for checking the intermediate embeddings via the indices - tuple_entry_hidden_dim=None, - taskmodule_config=taskmodule_config, - span_embedding_mode=span_embedding_mode, - tuple_embedding_mode=tuple_embedding_mode, - ) - - inputs, targets = batch - batch_size, seq_len = inputs["input_ids"].shape - - # set seed to make sure the output is deterministic - torch.manual_seed(42) - # return embeddings to check the logits - output = simple_model.forward(inputs, return_embeddings=True) - assert set(output) == {"logits", "last_hidden_state", "span_embeddings", "tuple_embeddings"} - logits_flat = output["logits"] - assert len(logits_flat.shape) == 2 - assert logits_flat.shape[-1] == NUM_CLASSES - - # check span_embeddings: they should be the entries of last_hidden_state at the - # span_start_indices and span_end_indices - for batch_idx in range(batch_size): - for j, (start, end) in enumerate( - zip(inputs["span_start_indices"][batch_idx], inputs["span_end_indices"][batch_idx]) - ): - current_expected_span_embedding_list = [] - if simple_model.span_embedding_mode == "start_and_end_token": - current_expected_span_embedding_list.append( - output["last_hidden_state"][batch_idx, start] - ) - current_expected_span_embedding_list.append( - output["last_hidden_state"][batch_idx, end] - ) - elif simple_model.span_embedding_mode == "start_token": - current_expected_span_embedding_list.append( - output["last_hidden_state"][batch_idx, start] - ) - elif simple_model.span_embedding_mode == "end_token": - current_expected_span_embedding_list.append( - output["last_hidden_state"][batch_idx, end] - ) - else: - raise ValueError( - f"Unknown span_embedding_mode: {simple_model.span_embedding_mode}" - ) - expected_current_span_embedding = torch.concat( - current_expected_span_embedding_list, dim=-1 - ) - current_span_embeddings = output["span_embeddings"][batch_idx, j] - torch.testing.assert_close(current_span_embeddings, expected_current_span_embedding) - - # check tuple_embeddings: they should be the entries of span_embeddings at the tuple_indices - tuple_idx = 0 - for batch_idx in range(batch_size): - for indices, is_valid in zip( - inputs["tuple_indices"][batch_idx], inputs["tuple_indices_mask"][batch_idx] - ): - if is_valid: - current_expected_tuple_embedding_list = [ - output["span_embeddings"][batch_idx, idx] for idx in indices - ] - if simple_model.tuple_embedding_mode == "concat": - expected_current_tuple_embedding = torch.concat( - current_expected_tuple_embedding_list, dim=-1 - ) - elif simple_model.tuple_embedding_mode == "multiply2_and_concat": - expected_current_tuple_embedding = torch.cat( - [ - current_expected_tuple_embedding_list[0] - * current_expected_tuple_embedding_list[1], - current_expected_tuple_embedding_list[0], - current_expected_tuple_embedding_list[1], - ], - dim=-1, - ) - elif simple_model.tuple_embedding_mode.startswith("index_"): - idx = int(simple_model.tuple_embedding_mode.split("_")[1]) - expected_current_tuple_embedding = current_expected_tuple_embedding_list[idx] - else: - raise ValueError( - f"Unknown tuple_embedding_mode: {simple_model.tuple_embedding_mode}" - ) - current_tuple_embedding = output["tuple_embeddings"][tuple_idx] - torch.testing.assert_close( - current_tuple_embedding, expected_current_tuple_embedding - ) - tuple_idx += 1 - - -def test_forward_logits(batch, model): - inputs, targets = batch - - # set seed to make sure the output is deterministic - torch.manual_seed(42) - # return embeddings to check the logits - output = model.forward(inputs) - assert set(output) == {"logits"} - logits_flat = output["logits"] - assert len(logits_flat.shape) == 2 - assert logits_flat.shape[-1] == NUM_CLASSES - # check the actual logits - torch.testing.assert_close( - logits_flat, - tensor( - [ - [ - -0.23075447976589203, - 0.08129829168319702, - -0.26441076397895813, - 0.3208361268043518, - ], - [ - -0.2247302085161209, - 0.21453489363193512, - -0.20609508454799652, - 0.2984844148159027, - ], - [ - -0.0552724152803421, - 0.18319237232208252, - -0.14115819334983826, - 0.23137536644935608, - ], - [ - -0.2897184491157532, - 0.17462071776390076, - -0.12004873156547546, - 0.1817789375782013, - ], - [ - -0.3101494312286377, - 0.18245069682598114, - -0.13525372743606567, - 0.28625163435935974, - ], - [ - -0.33728304505348206, - 0.22038179636001587, - -0.0482308566570282, - 0.25237396359443665, - ], - [ - -0.3835912048816681, - 0.20549766719341278, - 0.15333643555641174, - 0.23370930552482605, - ], - ] - ), - ) - - -def test_step(batch, model, config): - torch.manual_seed(42) - loss = model._step("train", batch) - assert loss is not None - if config == {}: - torch.testing.assert_close(loss, torch.tensor(1.3911350965499878)) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_training_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(TRAINING, batch_idx=0) - assert metric is None - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - if config == {}: - torch.testing.assert_close(loss, torch.tensor(1.3911350965499878)) - else: - raise ValueError(f"Unknown config: {config}") - - model.on_train_epoch_end() - - -def test_validation_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(VALIDATION, batch_idx=0) - metric.reset() - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - metric_values = {k: v.item() for k, v in metric.compute().items()} - if config == {}: - torch.testing.assert_close(loss, torch.tensor(1.3911350965499878)) - assert metric_values == { - "macro/f1": 0.14814814925193787, - "micro/f1": 0.2857142984867096, - "no_relation/f1": 0.0, - "org:founded_by/f1": 0.0, - "per:employee_of/f1": 0.0, - "per:founder/f1": 0.4444444477558136, - } - else: - raise ValueError(f"Unknown config: {config}") - - model.on_validation_epoch_end() - - -def test_test_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(TESTING, batch_idx=0) - metric.reset() - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - metric_values = {k: v.item() for k, v in metric.compute().items()} - if config == {}: - torch.testing.assert_close(loss, torch.tensor(1.3911350965499878)) - assert metric_values == { - "macro/f1": 0.14814814925193787, - "micro/f1": 0.2857142984867096, - "no_relation/f1": 0.0, - "org:founded_by/f1": 0.0, - "per:employee_of/f1": 0.0, - "per:founder/f1": 0.4444444477558136, - } - else: - raise ValueError(f"Unknown config: {config}") - - model.on_test_epoch_end() - - -@pytest.mark.parametrize("test_step", [False, True]) -def test_predict_and_predict_step(model, batch, config, test_step): - torch.manual_seed(42) - if test_step: - predictions = model.predict_step(batch, batch_idx=0, dataloader_idx=0) - else: - predictions = model.predict(batch[0]) - - assert set(predictions) == {"labels", "probabilities"} - labels = predictions["labels"] - assert labels.shape == batch[1]["labels"].shape - probabilities = predictions["probabilities"] - if config == {}: - torch.testing.assert_close(labels, tensor([[3, -100, -100], [3, 3, 3], [3, 3, 3]])) - torch.testing.assert_close( - probabilities.round(decimals=4), - tensor( - [ - [ - [0.1973, 0.2695, 0.1907, 0.3425], - [-1.0000, -1.0000, -1.0000, -1.0000], - [-1.0000, -1.0000, -1.0000, -1.0000], - ], - [ - [0.1902, 0.2951, 0.1938, 0.3209], - [0.2213, 0.2809, 0.2031, 0.2947], - [0.1859, 0.2958, 0.2203, 0.2979], - ], - [ - [0.1772, 0.2900, 0.2111, 0.3217], - [0.1699, 0.2968, 0.2269, 0.3064], - [0.1571, 0.2831, 0.2687, 0.2912], - ], - ], - ), - ) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_configure_optimizers(model): - model.trainer = Trainer(max_epochs=10) - optimizer_and_schedular = model.configure_optimizers() - assert optimizer_and_schedular is not None - optimizers, schedulers = optimizer_and_schedular - - assert len(optimizers) == 1 - optimizer = optimizers[0] - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults["lr"] == 1e-05 - assert optimizer.defaults["weight_decay"] == 0.01 - assert optimizer.defaults["eps"] == 1e-08 - - assert len(schedulers) == 1 - scheduler = schedulers[0] - assert isinstance(scheduler["scheduler"], torch.optim.lr_scheduler.LambdaLR) - - -def test_configure_optimizers_with_task_learning_rate(): - model = SpanTupleClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=NUM_CLASSES, - warmup_proportion=0.0, - task_learning_rate=1e-4, - ) - optimizer = model.configure_optimizers() - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert len(optimizer.param_groups) == 2 - # check that all parameters are in the optimizer - assert set(optimizer.param_groups[0]["params"]) | set( - optimizer.param_groups[1]["params"] - ) == set(model.parameters()) - - # base model parameters - param_group = optimizer.param_groups[0] - assert param_group["lr"] == 1e-05 - assert len(param_group["params"]) == 39 - - # task parameters - param_group = optimizer.param_groups[1] - assert param_group["lr"] == 1e-04 - assert len(param_group["params"]) == 6 diff --git a/tests/models/test_token_classification_with_seq2seq_encoder_and_crf.py b/tests/models/test_token_classification_with_seq2seq_encoder_and_crf.py deleted file mode 100644 index 5b31b2f13..000000000 --- a/tests/models/test_token_classification_with_seq2seq_encoder_and_crf.py +++ /dev/null @@ -1,639 +0,0 @@ -import pytest -import torch -from pytorch_lightning import Trainer - -from pie_modules.models import TokenClassificationModelWithSeq2SeqEncoderAndCrf -from pie_modules.models.common import TESTING, TRAINING, VALIDATION -from pie_modules.taskmodules import LabeledSpanExtractionByTokenClassificationTaskModule -from tests import _config_to_str -from tests.models import trunc_number - -CONFIGS = [{}, {"use_crf": False}] -CONFIG_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIG_DICT.keys()) -def config_str(request): - return request.param - - -@pytest.fixture(scope="module") -def config(config_str): - return CONFIG_DICT[config_str] - - -@pytest.fixture -def taskmodule_config(): - return { - "taskmodule_type": "LabeledSpanExtractionByTokenClassificationTaskModule", - "tokenizer_name_or_path": "bert-base-cased", - "span_annotation": "entities", - "partition_annotation": None, - "label_pad_id": -100, - "labels": ["ORG", "PER"], - "include_ill_formed_predictions": True, - "combine_token_scores_method": "mean", - "tokenize_kwargs": None, - "pad_kwargs": None, - "log_precision_recall_metrics": True, - } - - -def test_taskmodule_config(documents, taskmodule_config): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - span_annotation="entities", - tokenizer_name_or_path=tokenizer_name_or_path, - ) - taskmodule.prepare(documents) - assert taskmodule.config == taskmodule_config - - -def test_batch(documents, batch, taskmodule_config): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule.from_config( - taskmodule_config - ) - encodings = taskmodule.encode(documents, encode_target=True, as_dataset=True) - batch_from_documents = taskmodule.collate(encodings[:4]) - - inputs, targets = batch - inputs_from_documents, targets_from_documents = batch_from_documents - torch.testing.assert_close(inputs["input_ids"], inputs_from_documents["input_ids"]) - torch.testing.assert_close(inputs["attention_mask"], inputs_from_documents["attention_mask"]) - torch.testing.assert_close(targets, targets_from_documents) - - -@pytest.fixture -def batch(): - inputs = { - "input_ids": torch.tensor( - [ - [101, 138, 1423, 5650, 119, 102, 0, 0, 0, 0, 0, 0], - [101, 13832, 3121, 2340, 138, 1759, 1120, 139, 119, 102, 0, 0], - [101, 13832, 3121, 2340, 140, 1105, 141, 119, 102, 0, 0, 0], - [101, 1752, 5650, 119, 13832, 3121, 2340, 142, 1105, 143, 119, 102], - ] - ).to(torch.long), - "attention_mask": torch.tensor( - [ - [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ), - "special_tokens_mask": torch.tensor( - [ - [1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], - [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], - [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], - ] - ), - } - targets = { - "labels": torch.tensor( - [ - [-100, 0, 0, 0, 0, -100, -100, -100, -100, -100, -100, -100], - [-100, 3, 4, 4, 4, 0, 0, 1, 0, -100, -100, -100], - [-100, 3, 4, 4, 4, 0, 1, 0, -100, -100, -100, -100], - [-100, 0, 0, 0, 3, 4, 4, 4, 0, 1, 0, -100], - ] - ) - } - return inputs, targets - - -@pytest.fixture -def model(batch, config, taskmodule_config) -> TokenClassificationModelWithSeq2SeqEncoderAndCrf: - seq2seq_dict = { - "type": "linear", - "out_features": 10, - } - torch.manual_seed(42) - model = TokenClassificationModelWithSeq2SeqEncoderAndCrf( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=5, - seq2seq_encoder=seq2seq_dict, - taskmodule_config=taskmodule_config, - metric_stages=["val", "test"], - **config, - ) - return model - - -def test_model(model, config): - assert model is not None - named_parameters = dict(model.named_parameters()) - parameter_means = {k: trunc_number(v.mean().item(), 7) for k, v in named_parameters.items()} - parameter_means_expected = { - "model.embeddings.word_embeddings.weight": 0.0031152, - "model.embeddings.position_embeddings.weight": 5.5e-05, - "model.embeddings.token_type_embeddings.weight": -0.0015419, - "model.embeddings.LayerNorm.weight": 1.312345, - "model.embeddings.LayerNorm.bias": -0.0294608, - "model.encoder.layer.0.attention.self.query.weight": -0.0003949, - "model.encoder.layer.0.attention.self.query.bias": 0.0185744, - "model.encoder.layer.0.attention.self.key.weight": 0.0003863, - "model.encoder.layer.0.attention.self.key.bias": 0.0020557, - "model.encoder.layer.0.attention.self.value.weight": 4.22e-05, - "model.encoder.layer.0.attention.self.value.bias": 0.0065417, - "model.encoder.layer.0.attention.output.dense.weight": 3.01e-05, - "model.encoder.layer.0.attention.output.dense.bias": 0.0007209, - "model.encoder.layer.0.attention.output.LayerNorm.weight": 1.199831, - "model.encoder.layer.0.attention.output.LayerNorm.bias": 0.0608714, - "model.encoder.layer.0.intermediate.dense.weight": -0.0011731, - "model.encoder.layer.0.intermediate.dense.bias": -0.1219958, - "model.encoder.layer.0.output.dense.weight": -0.0002212, - "model.encoder.layer.0.output.dense.bias": -0.0013031, - "model.encoder.layer.0.output.LayerNorm.weight": 1.2419648, - "model.encoder.layer.0.output.LayerNorm.bias": 0.005295, - "model.encoder.layer.1.attention.self.query.weight": -0.0007321, - "model.encoder.layer.1.attention.self.query.bias": -0.0358397, - "model.encoder.layer.1.attention.self.key.weight": 0.0001333, - "model.encoder.layer.1.attention.self.key.bias": 0.0045062, - "model.encoder.layer.1.attention.self.value.weight": 0.0001012, - "model.encoder.layer.1.attention.self.value.bias": -0.0007094, - "model.encoder.layer.1.attention.output.dense.weight": -2.43e-05, - "model.encoder.layer.1.attention.output.dense.bias": 0.0041446, - "model.encoder.layer.1.attention.output.LayerNorm.weight": 1.0377343, - "model.encoder.layer.1.attention.output.LayerNorm.bias": 0.0443237, - "model.encoder.layer.1.intermediate.dense.weight": -0.001344, - "model.encoder.layer.1.intermediate.dense.bias": -0.1247257, - "model.encoder.layer.1.output.dense.weight": -5.32e-05, - "model.encoder.layer.1.output.dense.bias": 0.000677, - "model.encoder.layer.1.output.LayerNorm.weight": 1.017162, - "model.encoder.layer.1.output.LayerNorm.bias": -0.0474442, - "model.pooler.dense.weight": 0.0001295, - "model.pooler.dense.bias": -0.0052078, - "seq2seq_encoder.weight": -0.0015382, - "seq2seq_encoder.bias": -0.0105704, - "classifier.weight": 0.0261459, - "classifier.bias": -0.0157966, - } - if config.get("use_crf", True): - parameter_means_expected.update( - { - "crf.start_transitions": -0.0341042, - "crf.end_transitions": 0.0140624, - "crf.transitions": 0.0056733, - } - ) - assert parameter_means == parameter_means_expected - - -def test_model_pickleable(model): - import pickle - - pickle.dumps(model) - - -def test_freeze_base_model(): - model = TokenClassificationModelWithSeq2SeqEncoderAndCrf( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=5, - freeze_base_model=True, - ) - - base_model_params = dict(model.model.named_parameters(prefix="model")) - assert len(base_model_params) > 0 - for param in base_model_params.values(): - assert not param.requires_grad - task_params = { - name: param for name, param in model.named_parameters() if name not in base_model_params - } - assert len(task_params) > 0 - for param in task_params.values(): - assert param.requires_grad - - -def test_tune_base_model(): - model = TokenClassificationModelWithSeq2SeqEncoderAndCrf( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=5, - ) - base_model_params = dict(model.model.named_parameters(prefix="model")) - assert len(base_model_params) > 0 - for param in base_model_params.values(): - assert param.requires_grad - task_params = { - name: param for name, param in model.named_parameters() if name not in base_model_params - } - assert len(task_params) > 0 - for param in task_params.values(): - assert param.requires_grad - - -def test_forward(batch, model): - inputs, targets = batch - batch_size, seq_len = inputs["input_ids"].shape - num_classes = int(torch.max(targets["labels"]) + 1) - - # set seed to make sure the output is deterministic - torch.manual_seed(42) - output = model.forward(inputs) - assert set(output) == {"logits"} - logits = output["logits"] - assert logits.shape == (batch_size, seq_len, num_classes) - # check the first batch entry - torch.testing.assert_close( - logits[0], - torch.tensor( - [ - [ - -1.065280795097351, - 0.22260898351669312, - -0.013371739536523819, - 1.0213487148284912, - -0.08737741410732269, - ], - [ - -1.092915415763855, - 0.07986105978488922, - 0.011286348104476929, - 0.7147902250289917, - -0.014343257993459702, - ], - [ - -1.0107779502868652, - 0.2041827142238617, - -0.06531291455030441, - 0.6551182270050049, - 0.04944971576333046, - ], - [ - -0.3324984312057495, - 0.27757787704467773, - 0.13295423984527588, - 0.26407280564308167, - -0.007371138781309128, - ], - [ - -0.6176304817199707, - 0.12915551662445068, - 0.268213152885437, - 0.43618908524513245, - -0.13303528726100922, - ], - [ - -0.5220450758934021, - 0.37291139364242554, - 0.2522115111351013, - 0.7383102178573608, - 0.1278681606054306, - ], - [ - -1.0737248659133911, - 0.0029090046882629395, - 0.06924695521593094, - 0.6680881977081299, - -0.15523286163806915, - ], - [ - -0.5176048278808594, - -0.01018303632736206, - 0.14543311297893524, - 0.5191693305969238, - -0.3461107611656189, - ], - [ - -0.9277648329734802, - 0.3154565095901489, - -0.07648143172264099, - 0.4210910201072693, - 0.2663896083831787, - ], - [ - -0.8864655494689941, - 0.2862459421157837, - -0.04168111830949783, - 0.4992614984512329, - 0.28455498814582825, - ], - [ - -0.9500657916069031, - 0.1869449019432068, - -0.005329027771949768, - 0.5908203721046448, - 0.06730394065380096, - ], - [ - -0.5336291193962097, - -0.053214408457279205, - 0.22038350999355316, - 0.48135989904403687, - -0.4338146448135376, - ], - ] - ), - ) - - # check the sums per sequence - torch.testing.assert_close( - logits.sum(1), - torch.tensor( - [ - [ - -9.530403137207031, - 2.0144565105438232, - 0.8975526690483093, - 7.009620189666748, - -0.3817189633846283, - ], - [ - -4.351415634155273, - 0.3694552183151245, - -0.8337129354476929, - 3.612205743789673, - 0.15454095602035522, - ], - [ - -6.173098564147949, - -2.6261491775512695, - 0.47521746158599854, - 3.344158172607422, - -5.086399078369141, - ], - [ - -9.28173542022705, - -1.6196215152740479, - 0.18393829464912415, - 5.492751121520996, - -4.148656845092773, - ], - ] - ), - ) - - -def test_step(batch, model, config): - torch.manual_seed(42) - loss = model._step("train", batch) - assert loss is not None - if config == {}: - torch.testing.assert_close(loss, torch.tensor(75.52511596679688)) - elif config == {"use_crf": False}: - torch.testing.assert_close(loss, torch.tensor(1.9434731006622314)) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_training_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(TRAINING, batch_idx=0) - assert metric is None - loss = model.training_step(batch, batch_idx=0) - assert loss is not None - if config == {}: - torch.testing.assert_close(loss, torch.tensor(77.59623718261719)) - elif config == {"use_crf": False}: - torch.testing.assert_close(loss, torch.tensor(1.9865683317184448)) - else: - raise ValueError(f"Unknown config: {config}") - - model.on_train_epoch_end() - - -def test_training_step_without_attention_mask(batch, model, config): - inputs, targets = batch - inputs_without_attention_mask = {k: v for k, v in inputs.items() if k != "attention_mask"} - loss = model.training_step(batch=(inputs_without_attention_mask, targets), batch_idx=0) - assert loss is not None - if config == {}: - torch.testing.assert_close(loss, torch.tensor(103.0061264038086)) - elif config == {"use_crf": False}: - torch.testing.assert_close(loss, torch.tensor(1.9988830089569092)) - else: - raise ValueError(f"Unknown config: {config}") - - -def test_validation_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(VALIDATION, batch_idx=0) - metric.reset() - loss = model.validation_step(batch, batch_idx=0) - assert loss is not None - metric_values = {k: v.item() for k, v in metric.compute().items()} - if config == {}: - torch.testing.assert_close(loss, torch.tensor(77.59623718261719)) - assert metric_values == { - "token/macro/f1": 0.20666667819023132, - "token/micro/f1": 0.2068965584039688, - "token/macro/precision": 0.29019609093666077, - "token/macro/recall": 0.2666666805744171, - "token/micro/precision": 0.2068965584039688, - "token/micro/recall": 0.2068965584039688, - "span/ORG/f1": 0.3636363744735718, - "span/ORG/recall": 0.25, - "span/ORG/precision": 0.6666666865348816, - "span/PER/f1": 0.0, - "span/PER/recall": 0.0, - "span/PER/precision": 0.0, - "span/micro/f1": 0.12121212482452393, - "span/micro/recall": 0.07407407462596893, - "span/micro/precision": 0.3333333432674408, - "span/macro/f1": 0.1818181872367859, - "span/macro/recall": 0.125, - "span/macro/precision": 0.3333333432674408, - } - elif config == {"use_crf": False}: - torch.testing.assert_close(loss, torch.tensor(1.9865683317184448)) - assert metric_values == { - "token/macro/f1": 0.11717171967029572, - "token/micro/f1": 0.17241379618644714, - "token/macro/precision": 0.22500000894069672, - "token/macro/recall": 0.24444444477558136, - "token/micro/precision": 0.17241379618644714, - "token/micro/recall": 0.17241379618644714, - "span/ORG/f1": 0.0, - "span/ORG/recall": 0.0, - "span/ORG/precision": 0.0, - "span/PER/f1": 0.0, - "span/PER/recall": 0.0, - "span/PER/precision": 0.0, - "span/micro/f1": 0.0, - "span/micro/recall": 0.0, - "span/micro/precision": 0.0, - "span/macro/f1": 0.0, - "span/macro/recall": 0.0, - "span/macro/precision": 0.0, - } - else: - raise ValueError(f"Unknown config: {config}") - - model.on_validation_epoch_end() - - -def test_test_step_and_on_epoch_end(batch, model, config): - metric = model._get_metric(TESTING, batch_idx=0) - metric.reset() - loss = model.test_step(batch, batch_idx=0) - assert loss is not None - metric_values = {k: v.item() for k, v in metric.compute().items()} - if config == {}: - torch.testing.assert_close(loss, torch.tensor(77.59623718261719)) - assert metric_values == { - "token/macro/f1": 0.20666667819023132, - "token/micro/f1": 0.2068965584039688, - "token/macro/precision": 0.29019609093666077, - "token/macro/recall": 0.2666666805744171, - "token/micro/precision": 0.2068965584039688, - "token/micro/recall": 0.2068965584039688, - "span/ORG/f1": 0.3636363744735718, - "span/ORG/recall": 0.25, - "span/ORG/precision": 0.6666666865348816, - "span/PER/f1": 0.0, - "span/PER/recall": 0.0, - "span/PER/precision": 0.0, - "span/micro/f1": 0.12121212482452393, - "span/micro/recall": 0.07407407462596893, - "span/micro/precision": 0.3333333432674408, - "span/macro/f1": 0.1818181872367859, - "span/macro/recall": 0.125, - "span/macro/precision": 0.3333333432674408, - } - elif config == {"use_crf": False}: - torch.testing.assert_close(loss, torch.tensor(1.9865683317184448)) - assert metric_values == { - "token/macro/f1": 0.11717171967029572, - "token/micro/f1": 0.17241379618644714, - "token/macro/precision": 0.22500000894069672, - "token/macro/recall": 0.24444444477558136, - "token/micro/precision": 0.17241379618644714, - "token/micro/recall": 0.17241379618644714, - "span/ORG/f1": 0.0, - "span/ORG/recall": 0.0, - "span/ORG/precision": 0.0, - "span/PER/f1": 0.0, - "span/PER/recall": 0.0, - "span/PER/precision": 0.0, - "span/micro/f1": 0.0, - "span/micro/recall": 0.0, - "span/micro/precision": 0.0, - "span/macro/f1": 0.0, - "span/macro/recall": 0.0, - "span/macro/precision": 0.0, - } - else: - raise ValueError(f"Unknown config: {config}") - - model.on_test_epoch_end() - - -@pytest.mark.parametrize("test_step", [False, True]) -def test_predict_and_predict_step(model, batch, config, test_step): - torch.manual_seed(42) - if test_step: - predictions = model.predict_step(batch, batch_idx=0, dataloader_idx=0) - else: - predictions = model.predict(batch[0]) - - assert set(predictions) == {"labels", "probabilities"} - labels = predictions["labels"] - probabilities = predictions["probabilities"] - if config == {}: - torch.testing.assert_close( - labels, - torch.tensor( - [ - [-100, 3, 3, 1, 3, -100, -100, -100, -100, -100, -100, -100], - [-100, 3, 1, 4, 4, 3, 3, 3, 2, -100, -100, -100], - [-100, 3, 2, 2, 3, 3, 3, 2, -100, -100, -100, -100], - [-100, 3, 3, 3, 2, 3, 1, 4, 3, 3, 2, -100], - ] - ), - ) - elif config == {"use_crf": False}: - torch.testing.assert_close( - labels, - torch.tensor( - [ - [-100, 3, 3, 1, 3, -100, -100, -100, -100, -100, -100, -100], - [-100, 3, 3, 4, 4, 3, 3, 3, 3, -100, -100, -100], - [-100, 3, 2, 2, 3, 3, 3, 2, -100, -100, -100, -100], - [-100, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -100], - ] - ), - ) - else: - raise ValueError(f"Unknown config: {config}") - - assert labels.shape == batch[1]["labels"].shape - torch.testing.assert_close( - probabilities[:2].round(decimals=4), - torch.tensor( - [ - [ - [0.0549, 0.1991, 0.1573, 0.4426, 0.1461], - [0.0614, 0.1984, 0.1853, 0.3744, 0.1806], - [0.0661, 0.2229, 0.1702, 0.3499, 0.1909], - [0.1310, 0.2411, 0.2087, 0.2379, 0.1813], - [0.0997, 0.2104, 0.2418, 0.2861, 0.1619], - [0.0904, 0.2213, 0.1961, 0.3189, 0.1732], - [0.0654, 0.1920, 0.2052, 0.3734, 0.1639], - [0.1162, 0.1929, 0.2254, 0.3276, 0.1379], - [0.0716, 0.2483, 0.1678, 0.2759, 0.2364], - [0.0726, 0.2344, 0.1689, 0.2901, 0.2340], - [0.0708, 0.2207, 0.1821, 0.3305, 0.1958], - [0.1162, 0.1879, 0.2470, 0.3206, 0.1284], - ], - [ - [0.1242, 0.1911, 0.1516, 0.3256, 0.2075], - [0.1291, 0.2089, 0.2046, 0.2890, 0.1684], - [0.2033, 0.2016, 0.1920, 0.2260, 0.1771], - [0.1793, 0.2191, 0.1800, 0.1889, 0.2328], - [0.1854, 0.2150, 0.1638, 0.1898, 0.2460], - [0.1363, 0.2007, 0.1738, 0.2887, 0.2005], - [0.1254, 0.2014, 0.1826, 0.2890, 0.2016], - [0.1305, 0.2056, 0.2056, 0.2590, 0.1993], - [0.1400, 0.2022, 0.2252, 0.2544, 0.1783], - [0.1299, 0.2051, 0.1933, 0.2751, 0.1966], - [0.1088, 0.2086, 0.1599, 0.2861, 0.2367], - [0.0910, 0.1794, 0.1840, 0.3793, 0.1663], - ], - ] - ), - ) - - -def test_configure_optimizers(model): - model.trainer = Trainer(max_epochs=10) - optimizer_and_schedular = model.configure_optimizers() - assert optimizer_and_schedular is not None - optimizers, schedulers = optimizer_and_schedular - - assert len(optimizers) == 1 - optimizer = optimizers[0] - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults["lr"] == 1e-05 - assert optimizer.defaults["weight_decay"] == 0.01 - assert optimizer.defaults["eps"] == 1e-08 - - assert len(schedulers) == 1 - scheduler = schedulers[0] - assert isinstance(scheduler["scheduler"], torch.optim.lr_scheduler.LambdaLR) - - -def test_configure_optimizers_with_task_learning_rate(): - model = TokenClassificationModelWithSeq2SeqEncoderAndCrf( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=5, - warmup_proportion=0.0, - task_learning_rate=1e-4, - ) - optimizer = model.configure_optimizers() - assert optimizer is not None - assert isinstance(optimizer, torch.optim.AdamW) - assert len(optimizer.param_groups) == 2 - # check that all parameters are in the optimizer - assert set(optimizer.param_groups[0]["params"]) | set( - optimizer.param_groups[1]["params"] - ) == set(model.parameters()) - - # base model parameters - param_group = optimizer.param_groups[0] - assert param_group["lr"] == 1e-05 - assert len(param_group["params"]) == 39 - - # task parameters - param_group = optimizer.param_groups[1] - assert param_group["lr"] == 1e-04 - assert len(param_group["params"]) == 5 diff --git a/tests/taskmodules/__init__.py b/tests/taskmodules/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/taskmodules/common/__init__.py b/tests/taskmodules/common/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/taskmodules/common/test_interfaces.py b/tests/taskmodules/common/test_interfaces.py deleted file mode 100644 index 5c65c797a..000000000 --- a/tests/taskmodules/common/test_interfaces.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Any, Dict, List, Set, Tuple - -from pie_modules.annotations import Span -from pie_modules.taskmodules.common import AnnotationEncoderDecoder - - -def test_annotation_encoder_decoder(): - """Test the AnnotationEncoderDecoder class.""" - - class SpanAnnotationEncoderDecoder(AnnotationEncoderDecoder[Span, Tuple[int, int]]): - """A class that uses the AnnotationEncoderDecoder class.""" - - def encode(self, annotation: Span, **kwargs) -> Tuple[int, int]: - return annotation.start, annotation.end - - def decode(self, encoding: Tuple[int, int], **kwargs) -> Span: - return Span(start=encoding[0], end=encoding[1]) - - def validate_encoding(self, encoding: Tuple[int, int]) -> Set[str]: - return {"order"} if encoding[0] > encoding[1] else set() - - encoder_decoder = SpanAnnotationEncoderDecoder() - - assert encoder_decoder.encode(Span(start=1, end=2)) == (1, 2) - assert encoder_decoder.decode((1, 2)) == Span(start=1, end=2) - assert encoder_decoder.validate_encoding((1, 2)) == set() - assert encoder_decoder.validate_encoding((2, 1)) == {"order"} diff --git a/tests/taskmodules/common/test_mixins.py b/tests/taskmodules/common/test_mixins.py deleted file mode 100644 index b6b921bb2..000000000 --- a/tests/taskmodules/common/test_mixins.py +++ /dev/null @@ -1,166 +0,0 @@ -import dataclasses -import logging -from typing import List - -import torch -from pie_core import Annotation - -from pie_modules.taskmodules.common import BatchableMixin -from pie_modules.taskmodules.common.mixins import RelationStatisticsMixin - - -def test_batchable_mixin(): - """Test the BatchableMixin class.""" - - @dataclasses.dataclass - class Foo(BatchableMixin): - """A class that uses the BatchableMixin class.""" - - a: List[int] - - @property - def len_a(self): - """Return the length of the list a.""" - return len(self.a) - - x = Foo(a=[1, 2, 3]) - y = Foo(a=[4, 5]) - - batch = Foo.batch( - values=[x, y], dtypes={"a": torch.int64, "len_a": torch.int64}, pad_values={"a": 0} - ) - torch.testing.assert_close(batch["a"], torch.tensor([[1, 2, 3], [4, 5, 0]])) - torch.testing.assert_close(batch["len_a"], torch.tensor([3, 2])) - - -def test_relation_statistics_mixin_show_statistics(caplog): - """Test the RelationStatisticsMixin class.""" - - class Foo(RelationStatisticsMixin): - """A class that uses the RelationStatisticsMixin class.""" - - pass - - @dataclasses.dataclass(eq=True, frozen=True) - class TestAnnotation(Annotation): - label: str - score: float = dataclasses.field(default=1.0, compare=False) - - x = Foo(collect_statistics=True) - - relations = [ - TestAnnotation(label="A", score=1.0), - TestAnnotation(label="B", score=0.5), - TestAnnotation(label="C", score=0.0), - TestAnnotation(label="D", score=0.3), - ] - # all available relations - x.collect_all_relations(kind="available", relations=relations) - # relations skipped for a reason ("test") - x.collect_relation(kind="skipped_test", relation=relations[1]) - # mark two relations as used, one of them is skipped for another (unknown) reason - x.collect_all_relations(kind="used", relations=[relations[0], relations[2]]) - - statistics = x.get_statistics() - - assert statistics == { - ("available", "A"): 1, - ("available", "B"): 1, - ("available", "D"): 1, - ("available", "no_relation"): 1, - ("skipped_other", "D"): 1, - ("skipped_test", "B"): 1, - ("used", "A"): 1, - ("used", "no_relation"): 1, - } - - with caplog.at_level(logging.INFO): - x.show_statistics() - assert caplog.messages[0] == ( - "Foo does not have a `none_label` attribute. " - "Using default value 'no_relation'. " - "`none_label` is used as the label for relations with score 0 in statistics and " - "all relations with label different from `none_label` will be summarized to 'all_relations'. " - "Set the `none_label` attribute before using statistics or " - "overwrite `get_none_label_for_statistics()` function to get rid of this message." - ) - assert caplog.messages[1] == ( - "statistics:\n" - "| | available | skipped_other | skipped_test | used | used % |\n" - "|:--------------|------------:|----------------:|---------------:|-------:|---------:|\n" - "| A | 1 | 0 | 0 | 1 | 100 |\n" - "| B | 1 | 0 | 1 | 0 | 0 |\n" - "| D | 1 | 1 | 0 | 0 | 0 |\n" - "| no_relation | 1 | 0 | 0 | 1 | 100 |\n" - "| all_relations | 3 | 1 | 1 | 1 | 33 |" - ) - - -def test_relation_statistics_mixin_show_statistics_no_relations(caplog): - """Test the RelationStatisticsMixin class with no predictions.""" - - class Foo(RelationStatisticsMixin): - """A class that uses the RelationStatisticsMixin class.""" - - pass - - x = Foo(collect_statistics=True) - - # Test with no relations collected - x.collect_all_relations(kind="available", relations=[]) - x.collect_all_relations(kind="used", relations=[]) - - statistics = x.get_statistics() - - assert statistics == {} - - with caplog.at_level(logging.INFO): - x.show_statistics() - assert caplog.messages[0] == "statistics:\n" "|--:|\n" "| 0 |" - - -def test_relation_statistics_mixin_show_statistics_custom_none_label(caplog): - """Test the RelationStatisticsMixin class with custom none_label.""" - - class Foo(RelationStatisticsMixin): - """A class that uses the RelationStatisticsMixin class. - - It also sets the `none_label` attribute which will be used by statistics. - """ - - def __init__(self, none_label: str = "no_relation", **kwargs): - super().__init__(**kwargs) - self.none_label = none_label - - @dataclasses.dataclass(eq=True, frozen=True) - class TestAnnotation(Annotation): - label: str - score: float = dataclasses.field(default=1.0, compare=False) - - x = Foo(collect_statistics=True, none_label="None_Label") - - relations = [ - TestAnnotation(label="A", score=1.0), - TestAnnotation(label="B", score=0.5), - TestAnnotation(label="C", score=0.0), - TestAnnotation(label="D", score=0.3), - ] - # all available relations - x.collect_all_relations(kind="available", relations=relations) - # relations skipped for a reason ("test") - x.collect_relation(kind="skipped_test", relation=relations[1]) - # mark two relations as used, one of them is skipped for another (unknown) reason - x.collect_all_relations(kind="used", relations=[relations[0], relations[2]]) - - with caplog.at_level(logging.INFO): - x.show_statistics() - assert caplog.messages[0] == ( - "statistics:\n" - "| | available | skipped_other | skipped_test | used | used % |\n" - "|:--------------|------------:|----------------:|---------------:|-------:|---------:|\n" - "| A | 1 | 0 | 0 | 1 | 100 |\n" - "| B | 1 | 0 | 1 | 0 | 0 |\n" - "| D | 1 | 1 | 0 | 0 | 0 |\n" - "| None_Label | 1 | 0 | 0 | 1 | 100 |\n" - "| all_relations | 3 | 1 | 1 | 1 | 33 |" - ) diff --git a/tests/taskmodules/common/test_taskmodule_with_document_converter.py b/tests/taskmodules/common/test_taskmodule_with_document_converter.py deleted file mode 100644 index 8540b5a3f..000000000 --- a/tests/taskmodules/common/test_taskmodule_with_document_converter.py +++ /dev/null @@ -1,163 +0,0 @@ -from typing import Optional, Type - -import pytest -from pie_core import Document -from typing_extensions import TypeAlias - -from pie_modules.documents import TextDocumentWithLabeledSpansAndBinaryRelations -from pie_modules.taskmodules import RETextClassificationWithIndicesTaskModule -from pie_modules.taskmodules.common import TaskModuleWithDocumentConverter -from tests.conftest import TestDocument - -DocumentType: TypeAlias = TestDocument -ConvertedDocumentType: TypeAlias = TextDocumentWithLabeledSpansAndBinaryRelations - - -class MyRETaskModuleWithDocConverter( - TaskModuleWithDocumentConverter, RETextClassificationWithIndicesTaskModule -): - @property - def document_type(self) -> Optional[Type[Document]]: - return TestDocument - - def _convert_document(self, document: DocumentType) -> ConvertedDocumentType: - result = document.as_type( - TextDocumentWithLabeledSpansAndBinaryRelations, - field_mapping={"entities": "labeled_spans", "relations": "binary_relations"}, - ) - new2old_span = { - new_s: old_s for old_s, new_s in zip(document.entities, result.labeled_spans) - } - result.metadata["new2old_span"] = new2old_span - return result - - def _integrate_predictions_from_converted_document( - self, document: DocumentType, converted_document: ConvertedDocumentType - ) -> None: - new2old_span = converted_document.metadata["new2old_span"] - for rel in converted_document.binary_relations.predictions: - new_rel = rel.copy(head=new2old_span[rel.head], tail=new2old_span[rel.tail]) - document.relations.predictions.append(new_rel) - - -@pytest.fixture(scope="module") -def taskmodule(documents): - result = MyRETaskModuleWithDocConverter(tokenizer_name_or_path="bert-base-cased") - result.prepare(documents) - return result - - -def test_taskmodule(taskmodule): - assert taskmodule is not None - assert taskmodule.document_type == TestDocument - - -@pytest.fixture(scope="module") -def task_encodings(taskmodule, documents): - return taskmodule.encode(documents, encode_target=True) - - -def test_task_encodings(task_encodings): - assert len(task_encodings) == 7 - - -def test_decode(taskmodule, task_encodings): - label_indices = [0, 1, 3, 0, 0, 2, 0] - probabilities = [0.1738, 0.6643, 0.2101, 0.0801, 0.0319, 0.81, 0.3079] - task_outputs = [ - {"labels": [taskmodule.id_to_label[label_idx]], "probabilities": [prob]} - for label_idx, prob in zip(label_indices, probabilities) - ] - docs_with_predictions = taskmodule.decode( - task_encodings=task_encodings, task_outputs=task_outputs - ) - assert all(isinstance(doc, TestDocument) for doc in docs_with_predictions) - - all_gold_relations = [doc.relations.resolve() for doc in docs_with_predictions] - assert all_gold_relations == [ - [("per:employee_of", (("PER", "Entity A"), ("ORG", "B")))], - [ - ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), - ("per:founder", (("PER", "Entity G"), ("ORG", "I"))), - ("org:founded_by", (("ORG", "I"), ("ORG", "H"))), - ], - [ - ("per:employee_of", (("PER", "Entity M"), ("ORG", "N"))), - ("per:founder", (("PER", "it"), ("ORG", "O"))), - ("org:founded_by", (("ORG", "O"), ("PER", "it"))), - ], - ] - - all_predicted_relations = [ - doc.relations.predictions.resolve() for doc in docs_with_predictions - ] - assert all_predicted_relations == [ - [("no_relation", (("PER", "Entity A"), ("ORG", "B")))], - [ - ("org:founded_by", (("PER", "Entity G"), ("ORG", "H"))), - ("per:founder", (("PER", "Entity G"), ("ORG", "I"))), - ("no_relation", (("ORG", "I"), ("ORG", "H"))), - ], - [ - ("no_relation", (("PER", "Entity M"), ("ORG", "N"))), - ("per:employee_of", (("PER", "it"), ("ORG", "O"))), - ("no_relation", (("ORG", "O"), ("PER", "it"))), - ], - ] - - -class MyRETaskModuleWithDocConverterWithoutDocType( - TaskModuleWithDocumentConverter, RETextClassificationWithIndicesTaskModule -): - def _convert_document(self, document: DocumentType) -> ConvertedDocumentType: - pass - - def _integrate_predictions_from_converted_document( - self, document: DocumentType, converted_document: ConvertedDocumentType - ) -> None: - pass - - -def test_missing_document_type_overwrite(): - taskmodule = MyRETaskModuleWithDocConverterWithoutDocType( - tokenizer_name_or_path="bert-base-cased" - ) - - with pytest.raises(NotImplementedError) as e: - taskmodule.document_type - assert ( - str(e.value) - == "please overwrite document_type for MyRETaskModuleWithDocConverterWithoutDocType" - ) - - -class MyRETaskModuleWithWrongDocConverter( - TaskModuleWithDocumentConverter, RETextClassificationWithIndicesTaskModule -): - @property - def document_type(self) -> Optional[Type[Document]]: - return TestDocument - - def _convert_document(self, document: DocumentType) -> ConvertedDocumentType: - result = TextDocumentWithLabeledSpansAndBinaryRelations(text="dummy") - result.metadata["original_document"] = None - return result - - def _integrate_predictions_from_converted_document( - self, document: DocumentType, converted_document: ConvertedDocumentType - ) -> None: - pass - - -def test_wrong_doc_converter(documents): - taskmodule = MyRETaskModuleWithWrongDocConverter(tokenizer_name_or_path="bert-base-cased") - taskmodule.prepare(documents) - with pytest.raises(ValueError) as e: - taskmodule.encode(documents, encode_target=True) - assert ( - str(e.value) - == "metadata of converted_document has already and entry 'original_document', " - "this is not allowed. Please adjust " - "'MyRETaskModuleWithWrongDocConverter._convert_document()' to produce " - "documents without that key in metadata." - ) diff --git a/tests/taskmodules/common/test_utils.py b/tests/taskmodules/common/test_utils.py deleted file mode 100644 index b9a426402..000000000 --- a/tests/taskmodules/common/test_utils.py +++ /dev/null @@ -1,17 +0,0 @@ -import torch - -from pie_modules.taskmodules.common.utils import get_first_occurrence_index - - -def test_get_first_occurrence_index(): - tensor: torch.LongTensor = torch.tensor( - [ - [0, 1, 1, 1, 1, 1], # 1 - [0, 0, 1, 1, 1, 1], # 2 - [0, 1, 1, 0, 0, 1], # 1 - [1, 1, 1, 1, 1, 1], # 0 - [0, 0, 0, 0, 0, 0], # 6 (=size of input) because no 1s at all - ] - ).to(torch.long) - indices = get_first_occurrence_index(tensor, 1) - torch.testing.assert_close(indices, torch.tensor([1, 2, 1, 0, 6])) diff --git a/tests/taskmodules/metrics/__init__.py b/tests/taskmodules/metrics/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/taskmodules/metrics/test_precision_recall_and_f1_for_labeled_annotations.py b/tests/taskmodules/metrics/test_precision_recall_and_f1_for_labeled_annotations.py deleted file mode 100644 index 90cc93e59..000000000 --- a/tests/taskmodules/metrics/test_precision_recall_and_f1_for_labeled_annotations.py +++ /dev/null @@ -1,151 +0,0 @@ -import pytest -from torch import tensor - -from pie_modules.annotations import LabeledSpan -from pie_modules.taskmodules.metrics import PrecisionRecallAndF1ForLabeledAnnotations - - -def test_precision_recall_and_f1_for_labeled_annotations(): - metric = PrecisionRecallAndF1ForLabeledAnnotations() - assert metric.metric_state == {} - - metric.update( - gold=[LabeledSpan(start=0, end=1, label="a")], - predicted=[LabeledSpan(start=0, end=1, label="a")], - ) - metric_state = {k: v.tolist() for k, v in metric.metric_state.items()} - assert metric_state == {"counts_a": [1, 1, 1], "counts_micro": [1, 1, 1]} - value = metric.compute() - assert value == { - "a": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "macro": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "micro": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - } - - metric.reset() - metric.update( - gold=[LabeledSpan(start=0, end=1, label="a"), LabeledSpan(start=0, end=1, label="b")], - predicted=[LabeledSpan(start=0, end=1, label="b"), LabeledSpan(start=0, end=1, label="c")], - ) - metric_state = {k: v.tolist() for k, v in metric.metric_state.items()} - assert metric_state == { - "counts_a": [1, 0, 0], - "counts_b": [1, 1, 1], - "counts_c": [0, 1, 0], - "counts_micro": [2, 2, 1], - } - assert metric.compute() == { - "b": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "a": {"recall": 0.0, "precision": 0.0, "f1": 0.0}, - "c": {"recall": 0.0, "precision": 0.0, "f1": 0.0}, - "macro": { - "f1": tensor(0.3333333432674408), - "precision": tensor(0.3333333432674408), - "recall": tensor(0.3333333432674408), - }, - "micro": {"recall": 0.5, "precision": 0.5, "f1": 0.5}, - } - - # check deduplication in same update - metric.reset() - metric.update( - gold=[ - LabeledSpan(start=0, end=1, label="a"), - LabeledSpan(start=0, end=1, label="a"), - LabeledSpan(start=0, end=1, label="b"), - ], - predicted=[ - LabeledSpan(start=0, end=1, label="b"), - LabeledSpan(start=0, end=1, label="b"), - LabeledSpan(start=0, end=1, label="c"), - ], - ) - metric_state = {k: v.tolist() for k, v in metric.metric_state.items()} - assert metric_state == { - "counts_a": [1, 0, 0], - "counts_b": [1, 1, 1], - "counts_c": [0, 1, 0], - "counts_micro": [2, 2, 1], - } - - # assert no deduplication over multiple updates - metric.reset() - metric.update( - gold=[LabeledSpan(start=0, end=1, label="a")], - predicted=[LabeledSpan(start=0, end=1, label="b")], - ) - metric.update( - gold=[LabeledSpan(start=0, end=1, label="b")], - predicted=[LabeledSpan(start=0, end=1, label="a")], - ) - metric_state = {k: v.tolist() for k, v in metric.metric_state.items()} - assert metric_state == { - "counts_a": [1, 1, 0], - "counts_b": [1, 1, 0], - "counts_c": [0, 0, 0], - "counts_micro": [2, 2, 0], - } - assert metric.compute() == { - "a": {"f1": 0.0, "precision": 0.0, "recall": 0.0}, - "b": {"f1": 0.0, "precision": 0.0, "recall": 0.0}, - "c": {"f1": 0.0, "precision": 0.0, "recall": 0.0}, - "macro": {"f1": 0.0, "precision": 0.0, "recall": 0.0}, - "micro": {"f1": 0.0, "precision": 0.0, "recall": 0.0}, - } - - -def test_precision_recall_and_f1_for_labeled_annotations_in_percent(): - metric = PrecisionRecallAndF1ForLabeledAnnotations( - in_percent=True, flatten_result_with_sep="/" - ) - - metric.update( - gold=[LabeledSpan(start=0, end=1, label="a")], - predicted=[LabeledSpan(start=0, end=1, label="a"), LabeledSpan(start=0, end=1, label="b")], - ) - values = {k: v.item() for k, v in metric.compute().items()} - assert values == { - "a/f1": 100.0, - "a/precision": 100.0, - "a/recall": 100.0, - "b/f1": 0.0, - "b/precision": 0.0, - "b/recall": 0.0, - "macro/f1": 50.0, - "macro/precision": 50.0, - "macro/recall": 50.0, - "micro/f1": 66.66667175292969, - "micro/precision": 50.0, - "micro/recall": 100.0, - } - - -def test_precision_recall_and_f1_for_labeled_annotations_with_label_mapping(): - metric = PrecisionRecallAndF1ForLabeledAnnotations( - label_mapping={"a": "label_a", "b": "label_b"} - ) - - metric.update( - gold=[LabeledSpan(start=0, end=1, label="a")], - predicted=[LabeledSpan(start=0, end=1, label="a"), LabeledSpan(start=0, end=1, label="b")], - ) - assert metric.compute() == { - "label_a": {"f1": 1.0, "precision": 1.0, "recall": 1.0}, - "label_b": {"f1": 0.0, "precision": 0.0, "recall": 0.0}, - "macro": {"f1": 0.5, "precision": 0.5, "recall": 0.5}, - "micro": {"f1": 0.6666666666666666, "precision": 0.5, "recall": 1.0}, - } - - -def test_precision_recall_and_f1_for_labeled_annotations_key_micro_error(): - metric = PrecisionRecallAndF1ForLabeledAnnotations() - with pytest.raises(ValueError) as excinfo: - metric.update( - gold=[LabeledSpan(start=0, end=1, label="micro")], - predicted=[], - ) - assert ( - str(excinfo.value) - == "The key 'micro' was used as an annotation label, but it is reserved for the micro average. " - "You can change which key is used for that with the 'key_micro' argument." - ) diff --git a/tests/taskmodules/metrics/test_wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py b/tests/taskmodules/metrics/test_wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py deleted file mode 100644 index b4e34ceb7..000000000 --- a/tests/taskmodules/metrics/test_wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function.py +++ /dev/null @@ -1,144 +0,0 @@ -import json -from typing import Any, Dict, Tuple - -import pytest -from torch import tensor -from torchmetrics import Metric - -from pie_modules.taskmodules.metrics import ( - WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction, -) - - -class TestMetric(Metric): - """A simple metric that computes the exact match ratio between predictions and targets.""" - - def __init__(self): - super().__init__() - self.add_state("matching", default=[]) - - def update(self, prediction: str, target: str): - self.matching.append(prediction == target) - - def compute(self): - # Note: returning NaN in the case of an empty list would be more correct, but - # returning 0.0 is more convenient for testing. - return sum(self.matching) / len(self.matching) if self.matching else 0.0 - - -@pytest.fixture(scope="module") -def wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function(): - def decode_with_errors_function(x: str) -> Tuple[Dict[str, Any], Dict[str, int]]: - if x == "error": - return {"entities": [], "relations": []}, {"dummy": 1} - else: - return json.loads(x), {"dummy": 0} - - layer_metrics = { - "entities": TestMetric(), - "relations": TestMetric(), - } - metric = WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction( - layer_metrics=layer_metrics, - unbatch_function=lambda x: x.split("\n"), - decode_layers_with_errors_function=decode_with_errors_function, - ) - return metric - - -def test_wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function( - wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function, -): - metric = wrapped_layer_metrics_with_unbatch_and_decode_with_errors_function - assert metric is not None - assert metric.unbatch_function is not None - assert metric.decode_layers_with_errors_function is not None - assert metric.layer_metrics is not None - assert metric.metric_state == { - "total": tensor(0), - "exact_encoding_matches": tensor(0), - } - - values = metric.compute() - assert metric.metric_state - assert values == { - "decoding_errors": {"all": 0.0}, - "entities": 0.0, - "exact_encoding_matches": 0.0, - "relations": 0.0, - } - - metric.reset() - # Prediction and expected are the same. - metric.update( - prediction=json.dumps({"entities": ["E1"], "relations": ["R1"]}), - expected=json.dumps({"entities": ["E1"], "relations": ["R1"]}), - ) - assert metric.metric_state == { - "total": tensor(1), - "exact_encoding_matches": tensor(1), - "errors_dummy": tensor(0), - } - values = metric.compute() - assert values == { - "decoding_errors": {"all": 0.0, "dummy": 0.0}, - "entities": 1.0, - "exact_encoding_matches": 1.0, - "relations": 1.0, - } - - metric.reset() - # Prediction and expected are different and there are multiple entries. - # The first entry is an exact match, the second entry is not. - metric.update( - prediction=json.dumps({"entities": ["E1"], "relations": ["R1"]}) - + "\n" - + json.dumps({"entities": ["E1"], "relations": ["R1"]}), - expected=json.dumps({"entities": ["E1"], "relations": ["R1"]}) - + "\n" - + json.dumps({"entities": ["E1"], "relations": ["R2"]}), - ) - assert metric.metric_state == { - "total": tensor(2), - "exact_encoding_matches": tensor(1), - "errors_dummy": tensor(0), - } - values = metric.compute() - assert values == { - "decoding_errors": {"all": 0.0, "dummy": 0.0}, - "entities": 1.0, - "exact_encoding_matches": 0.5, - "relations": 0.5, - } - - metric.reset() - # Encoding error - metric.update( - prediction="error", - expected=json.dumps({"entities": ["E1"], "relations": []}), - ) - assert metric.metric_state == { - "total": tensor(1), - "exact_encoding_matches": tensor(0), - "errors_dummy": tensor(1), - } - values = metric.compute() - # In the case on an error, the decoding function returns adict with empty lists for entities and relations. - # Thus, we get a perfect match for entities and a 0.0 match for relations. - assert values == { - "decoding_errors": {"all": 1.0, "dummy": 1.0}, - "entities": 0.0, - "exact_encoding_matches": 0.0, - "relations": 1.0, - } - - # test mismatched number of predictions and targets - metric.reset() - with pytest.raises(ValueError) as excinfo: - metric.update( - prediction=json.dumps({"entities": ["E1"], "relations": ["R1"]}), - expected=json.dumps({"entities": ["E1"], "relations": ["R1"]}) - + "\n" - + json.dumps({"entities": ["E1"], "relations": ["R1"]}), - ) - assert str(excinfo.value) == "Number of predictions (1) and targets (2) do not match." diff --git a/tests/taskmodules/metrics/test_wrapped_metric_with_prepare_function.py b/tests/taskmodules/metrics/test_wrapped_metric_with_prepare_function.py deleted file mode 100644 index e06130b09..000000000 --- a/tests/taskmodules/metrics/test_wrapped_metric_with_prepare_function.py +++ /dev/null @@ -1,137 +0,0 @@ -from functools import partial - -import pytest -from torchmetrics import Metric - -from pie_modules.taskmodules.metrics import WrappedMetricWithPrepareFunction - - -class TestMetric(Metric): - """A simple metric that computes the exact match ratio between predictions and targets.""" - - def __init__(self): - super().__init__() - self.add_state("matching", default=[]) - - def update(self, prediction: str, target: str): - self.matching.append(prediction == target) - - def compute(self): - # Note: returning NaN in the case of an empty list would be more correct, but - # returning 0.0 is more convenient for testing. - return sum(self.matching) / len(self.matching) if self.matching else 0.0 - - -def test_metric(): - metric = WrappedMetricWithPrepareFunction( - metric=TestMetric(), prepare_function=lambda x: x.split()[0] - ) - - assert metric is not None - assert metric.prepare_function is not None - - assert metric.compute() == 0.0 - - metric.reset() - metric(prediction="abc", target="abc") - assert metric.compute() == 1.0 - - metric.reset() - metric(prediction="abc", target="def") - assert metric.compute() == 0.0 - - metric.reset() - metric(prediction="abc def", target="abc xyz") - # we consider just the first word, so this is still 1.0 - assert metric.compute() == 1.0 - - metric.reset() - metric(prediction="abc def", target="xyz def") - assert metric.compute() == 0.0 - - -def split_both_and_remove_where_both_match( - preds: str, targets: str, match: str -) -> tuple[list[str], list[str]]: - preds = preds.split() - targets = targets.split() - not_both_none_indices = [ - i for i, (p, t) in enumerate(zip(preds, targets)) if p != match or t != match - ] - preds = [preds[i] for i in not_both_none_indices] - targets = [targets[i] for i in not_both_none_indices] - return preds, targets - - -def test_wrapped_metric_with_prepare_both_function(): - metric = WrappedMetricWithPrepareFunction( - metric=TestMetric(), - prepare_together_function=partial(split_both_and_remove_where_both_match, match="none"), - prepare_does_unbatch=True, - ) - - assert metric is not None - assert metric.prepare_both_function is not None - - assert metric.compute() == 0.0 - - # none is removed from both, remaining is the same - metric.reset() - metric(prediction="abc none", target="abc none") - assert metric.compute() == 1.0 - - # none is removed from both, remaining is different - metric.reset() - metric(prediction="abc none", target="def none") - assert metric.compute() == 0.0 - - # none is not removed from both, remaining is partially the same - metric.reset() - metric(prediction="abc def", target="abc none") - assert metric.compute() == 0.5 - - # none is not removed from both, remaining is different - metric.reset() - metric(prediction="abc def", target="def none") - assert metric.compute() == 0.0 - - -@pytest.fixture(scope="module") -def wrapped_metric_with_unbatch_function(): - # just split the strings to unbatch the inputs - return WrappedMetricWithPrepareFunction( - metric=TestMetric(), prepare_function=lambda x: x.split(), prepare_does_unbatch=True - ) - - -def test_wrapped_metric_with_unbatch_function(wrapped_metric_with_unbatch_function): - metric = wrapped_metric_with_unbatch_function - assert metric is not None - - assert metric.compute() == 0.0 - - metric.reset() - metric(prediction="abc", target="abc") - assert metric.compute() == 1.0 - - metric.reset() - metric(prediction="abc", target="def") - assert metric.compute() == 0.0 - - metric.reset() - metric(prediction="abc def", target="abc def") - assert metric.compute() == 1.0 - - metric.reset() - metric(prediction="abc def", target="def abc") - assert metric.compute() == 0.0 - - metric.reset() - metric(prediction="abc xyz", target="def xyz") - assert metric.compute() == 0.5 - - -def test_wrapped_metric_with_unbatch_function_size_mismatch(wrapped_metric_with_unbatch_function): - with pytest.raises(ValueError) as excinfo: - wrapped_metric_with_unbatch_function(prediction="abc", target="abc def") - assert str(excinfo.value) == "Number of prepared predictions (1) and targets (2) do not match." diff --git a/tests/taskmodules/pointer_network/__init__.py b/tests/taskmodules/pointer_network/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/taskmodules/pointer_network/test_annotation_encoder_decoder.py b/tests/taskmodules/pointer_network/test_annotation_encoder_decoder.py deleted file mode 100644 index 4dbeaab8a..000000000 --- a/tests/taskmodules/pointer_network/test_annotation_encoder_decoder.py +++ /dev/null @@ -1,441 +0,0 @@ -import pytest - -from pie_modules.annotations import BinaryRelation, LabeledSpan, Span -from pie_modules.taskmodules.pointer_network.annotation_encoder_decoder import ( - BinaryRelationEncoderDecoder, - DecodingLabelException, - DecodingLengthException, - DecodingNegativeIndexException, - DecodingOrderException, - LabeledSpanEncoderDecoder, - SpanEncoderDecoder, - SpanEncoderDecoderWithOffset, -) - - -@pytest.mark.parametrize("exclusive_end", [True, False]) -def test_span_encoder_decoder(exclusive_end): - """Test the SimpleSpanEncoderDecoder class.""" - - encoder_decoder = SpanEncoderDecoder(exclusive_end) - if exclusive_end: - assert encoder_decoder.encode(Span(start=1, end=2)) == [1, 2] - assert encoder_decoder.decode([1, 2]) == Span(start=1, end=2) - else: - assert encoder_decoder.encode(Span(start=1, end=2)) == [1, 1] - assert encoder_decoder.decode([1, 1]) == Span(start=1, end=2) - - -def test_span_encoder_decoder_wrong_length(): - """Test the SimpleSpanEncoderDecoder class.""" - - encoder_decoder = SpanEncoderDecoder() - with pytest.raises(DecodingLengthException) as excinfo: - encoder_decoder.decode([1]) - assert ( - str(excinfo.value) - == "two values are required to decode as Span, but encoding has length 1" - ) - assert excinfo.value.identifier == "len" - - with pytest.raises(DecodingLengthException) as excinfo: - encoder_decoder.decode([1, 2, 3]) - assert ( - str(excinfo.value) - == "two values are required to decode as Span, but encoding has length 3" - ) - assert excinfo.value.identifier == "len" - - -def test_span_encoder_decoder_wrong_order(): - """Test the SimpleSpanEncoderDecoder class.""" - - encoder_decoder = SpanEncoderDecoder() - - with pytest.raises(DecodingOrderException) as excinfo: - encoder_decoder.decode([3, 2]) - assert ( - str(excinfo.value) - == "end index can not be smaller than start index, but got: start=3, end=2" - ) - assert excinfo.value.identifier == "order" - - # zero-length span - span = encoder_decoder.decode([1, 1]) - assert span is not None - - -def test_span_encoder_decoder_wrong_offset(): - """Test the SimpleSpanEncoderDecoder class.""" - - encoder_decoder = SpanEncoderDecoder() - - with pytest.raises(DecodingNegativeIndexException) as excinfo: - encoder_decoder.decode([-1, 2]) - assert str(excinfo.value) == "indices must be positive, but got: [-1, 2]" - assert excinfo.value.identifier == "index" - - -def test_span_encoder_decoder_with_offset(): - """Test the SpanEncoderDecoderWithOffset class.""" - - encoder_decoder = SpanEncoderDecoderWithOffset(offset=1) - - assert encoder_decoder.encode(Span(start=1, end=2)) == [2, 3] - assert encoder_decoder.decode([2, 3]) == Span(start=1, end=2) - - -@pytest.mark.parametrize("mode", ["indices_label", "label_indices"]) -def test_labeled_span_encoder_decoder(mode): - """Test the LabeledSpanEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1} - encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode=mode, - ) - - if mode == "indices_label": - assert encoder_decoder.encode(LabeledSpan(start=1, end=2, label="A")) == [3, 4, 0] - assert encoder_decoder.decode([3, 4, 0]) == LabeledSpan(start=1, end=2, label="A") - elif mode == "label_indices": - assert encoder_decoder.encode(LabeledSpan(start=1, end=2, label="A")) == [0, 3, 4] - assert encoder_decoder.decode([0, 3, 4]) == LabeledSpan(start=1, end=2, label="A") - else: - raise ValueError(f"unknown mode: {mode}") - - -@pytest.mark.parametrize("mode", ["indices_label", "label_indices"]) -def test_labeled_span_encoder_decoder_wrong_label_encoding(mode): - """Test the LabeledSpanEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1} - encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode=mode, - ) - - if mode == "indices_label": - with pytest.raises(DecodingLabelException) as excinfo: - encoder_decoder.decode([2, 3, 4]) - elif mode == "label_indices": - with pytest.raises(DecodingLabelException) as excinfo: - encoder_decoder.decode([4, 2, 3]) - assert str(excinfo.value) == "unknown label id: 4 (label2id: {'A': 0, 'B': 1})" - assert excinfo.value.identifier == "label" - - -def test_labeled_span_encoder_decoder_unknown_mode(): - """Test the LabeledSpanEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1} - encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode="unknown", - ) - with pytest.raises(ValueError) as excinfo: - encoder_decoder.encode(LabeledSpan(start=1, end=2, label="A")) - assert str(excinfo.value) == "unknown mode: unknown" - - with pytest.raises(ValueError) as excinfo: - encoder_decoder.decode([0, 3, 4]) - assert str(excinfo.value) == "unknown mode: unknown" - - -@pytest.mark.parametrize( - "mode", ["head_tail_label", "tail_head_label", "label_head_tail", "label_tail_head"] -) -def test_binary_relation_encoder_decoder(mode): - """Test the BinaryRelationEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1, "C": 2} - labeled_span_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode="indices_label", - ) - encoder_decoder = BinaryRelationEncoderDecoder( - head_encoder_decoder=labeled_span_encoder_decoder, - tail_encoder_decoder=labeled_span_encoder_decoder, - label2id=label2id, - mode=mode, - ) - - if mode == "head_tail_label": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - ) == [4, 5, 0, 6, 7, 1, 2] - assert encoder_decoder.decode([4, 5, 0, 6, 7, 1, 2]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - elif mode == "tail_head_label": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - ) == [6, 7, 1, 4, 5, 0, 2] - assert encoder_decoder.decode([6, 7, 1, 4, 5, 0, 2]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - elif mode == "label_head_tail": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - ) == [2, 4, 5, 0, 6, 7, 1] - assert encoder_decoder.decode([2, 4, 5, 0, 6, 7, 1]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - elif mode == "label_tail_head": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - ) == [2, 6, 7, 1, 4, 5, 0] - assert encoder_decoder.decode([2, 6, 7, 1, 4, 5, 0]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=3, end=4, label="B"), - label="C", - ) - - -@pytest.mark.parametrize( - "mode", ["head_tail_label", "tail_head_label", "label_head_tail", "label_tail_head"] -) -def test_binary_relation_encoder_decoder_loop_relation(mode): - """Test the BinaryRelationEncoderDecoder class.""" - - # we use different label2id for head and tail to test the case where the head and tail - # have different label sets - head_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=3), - label2id={"A": 1, "B": 2}, - mode="indices_label", - ) - tail_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=3), - label2id={"A": -1, "B": -2}, - mode="indices_label", - ) - encoder_decoder = BinaryRelationEncoderDecoder( - head_encoder_decoder=head_encoder_decoder, - tail_encoder_decoder=tail_encoder_decoder, - label2id={"N": 3}, - mode=mode, - loop_dummy_relation_name="L", - none_label="N", - ) - - if mode == "head_tail_label": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - ) == [4, 5, 1, 3, 3, 3, 3] - assert encoder_decoder.decode([4, 5, 1, 3, 3, 3, 3]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - elif mode == "tail_head_label": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - ) == [4, 5, -1, 3, 3, 3, 3] - assert encoder_decoder.decode([4, 5, -1, 3, 3, 3, 3]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - elif mode == "label_head_tail": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - ) == [3, 4, 5, 1, 3, 3, 3] - assert encoder_decoder.decode([3, 4, 5, 1, 3, 3, 3]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - elif mode == "label_tail_head": - assert encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - ) == [3, 4, 5, -1, 3, 3, 3] - assert encoder_decoder.decode([3, 4, 5, -1, 3, 3, 3]) == BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - else: - raise ValueError(f"unknown mode: {mode}") - - -@pytest.mark.parametrize( - "loop_dummy_relation_name,none_label", - [("L", None), (None, "N")], -) -def test_binary_relation_encoder_decoder_only_loop_or_none_label_provided( - loop_dummy_relation_name, none_label -): - """Test the BinaryRelationEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1, "N": 2} - labeled_span_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode="indices_label", - ) - encoder_decoder = BinaryRelationEncoderDecoder( - head_encoder_decoder=labeled_span_encoder_decoder, - tail_encoder_decoder=labeled_span_encoder_decoder, - label2id=label2id, - mode="head_tail_label", - loop_dummy_relation_name=loop_dummy_relation_name, - none_label=none_label, - ) - - if loop_dummy_relation_name is not None: - with pytest.raises(ValueError) as excinfo: - encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label=loop_dummy_relation_name, - ) - ) - - assert ( - str(excinfo.value) - == "loop_dummy_relation_name is set, but none_label is not set: None" - ) - elif none_label is not None: - none_id = label2id[none_label] - with pytest.raises(ValueError) as excinfo: - encoder_decoder.decode([4, 5, 1, none_id, none_id, none_id, none_id]) - assert ( - str(excinfo.value) - == "loop_dummy_relation_name is not set, but none_label=N was found in decoded encoding: " - "[4, 5, 1, 2, 2, 2, 2] (label2id: {'A': 0, 'B': 1, 'N': 2}))" - ) - else: - raise ValueError("unknown setting") - - -@pytest.mark.parametrize( - "loop_dummy_relation_name,none_label", - [(None, None), ("L", "N")], -) -def test_binary_relation_encoder_decoder_unknown_mode(loop_dummy_relation_name, none_label): - """Test the BinaryRelationEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1, "N": 2, "L": 3} - labeled_span_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode="indices_label", - ) - encoder_decoder = BinaryRelationEncoderDecoder( - head_encoder_decoder=labeled_span_encoder_decoder, - tail_encoder_decoder=labeled_span_encoder_decoder, - label2id=label2id, - mode="unknown", - loop_dummy_relation_name=loop_dummy_relation_name, - none_label=none_label, - ) - with pytest.raises(ValueError) as excinfo: - encoder_decoder.encode( - BinaryRelation( - head=LabeledSpan(start=1, end=2, label="A"), - tail=LabeledSpan(start=1, end=2, label="A"), - label="L", - ) - ) - assert str(excinfo.value) == "unknown mode: unknown" - - with pytest.raises(ValueError) as excinfo: - encoder_decoder.decode([2, 2, 2, 2, 2, 2, 2]) - assert str(excinfo.value) == "unknown mode: unknown" - - -def test_binary_relation_encoder_decoder_wrong_encoding_size(): - """Test the BinaryRelationEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1, "C": 2} - labeled_span_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode="indices_label", - ) - encoder_decoder = BinaryRelationEncoderDecoder( - head_encoder_decoder=labeled_span_encoder_decoder, - tail_encoder_decoder=labeled_span_encoder_decoder, - label2id=label2id, - mode="head_tail_label", - ) - with pytest.raises(DecodingLengthException) as excinfo: - encoder_decoder.decode([1, 2, 3, 4, 5, 6]) - assert ( - str(excinfo.value) - == "seven values are required to decode as BinaryRelation, but the encoding has length 6" - ) - assert excinfo.value.identifier == "len" - - with pytest.raises(DecodingLengthException) as excinfo: - encoder_decoder.decode([1, 2, 3, 4, 5, 6, 7, 8]) - assert ( - str(excinfo.value) - == "seven values are required to decode as BinaryRelation, but the encoding has length 8" - ) - assert excinfo.value.identifier == "len" - - -def test_binary_relation_encoder_decoder_wrong_label_index(): - """Test the BinaryRelationEncoderDecoder class.""" - - label2id = {"A": 0, "B": 1, "C": 2} - labeled_span_encoder_decoder = LabeledSpanEncoderDecoder( - span_encoder_decoder=SpanEncoderDecoderWithOffset(offset=len(label2id)), - label2id=label2id, - mode="indices_label", - ) - encoder_decoder = BinaryRelationEncoderDecoder( - head_encoder_decoder=labeled_span_encoder_decoder, - tail_encoder_decoder=labeled_span_encoder_decoder, - label2id=label2id, - mode="head_tail_label", - ) - with pytest.raises(DecodingLabelException) as excinfo: - encoder_decoder.decode([1, 2, 3, 4, 5, 6, 7]) - assert str(excinfo.value) == "unknown label id: 7 (label2id: {'A': 0, 'B': 1, 'C': 2})" - assert excinfo.value.identifier == "label" diff --git a/tests/taskmodules/pointer_network/test_logits_processor.py b/tests/taskmodules/pointer_network/test_logits_processor.py deleted file mode 100644 index 145fa1e4c..000000000 --- a/tests/taskmodules/pointer_network/test_logits_processor.py +++ /dev/null @@ -1,80 +0,0 @@ -import pytest -import torch - -from pie_modules.taskmodules.pointer_network.logits_processor import ( - FinitizeLogitsProcessor, - PrefixConstrainedLogitsProcessorWithMaximum, -) - - -def test_prefix_constrained_logits_processor_with_maximum(): - def allow_last_three(batch_id, sent, max_index): - return list(range(max_index - 3, max_index)) - - logits_processor = PrefixConstrainedLogitsProcessorWithMaximum( - prefix_allowed_tokens_fn=allow_last_three, num_beams=1 - ) - - input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7]]).to(dtype=torch.long) - scores = torch.tensor([[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.0]]).to(dtype=torch.float) - new_scores = logits_processor(input_ids, scores) - assert new_scores.shape == scores.shape - torch.testing.assert_close( - new_scores, - torch.tensor( - [[-float("inf"), -float("inf"), -float("inf"), -float("inf"), 0.9, 0.9, 0.0]] - ), - ) - - -def test_prefix_constrained_logits_processor_with_maximum_with_inf_scores(): - def allow_last_three(batch_id, sent, max_index): - return list(range(max_index - 3, max_index)) - - logits_processor = PrefixConstrainedLogitsProcessorWithMaximum( - prefix_allowed_tokens_fn=allow_last_three, num_beams=1 - ) - input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7]]).to(dtype=torch.long) - scores_with_pos_inf = torch.tensor([[0.9, 0.9, float("inf"), 0.9, 0.9, 0.9, 0.0]]).to( - dtype=torch.float - ) - scores_with_neg_inf = torch.tensor([[0.9, 0.9, -float("inf"), 0.9, 0.9, 0.9, 0.0]]).to( - dtype=torch.float - ) - - with pytest.raises(ValueError, match="scores contains ±inf or NaN"): - logits_processor(input_ids, scores_with_pos_inf) - - with pytest.raises(ValueError, match="scores contains ±inf or NaN"): - logits_processor(input_ids, scores_with_neg_inf) - - -def test_prefix_constrained_logits_processor_with_maximum_without_allowed_tokens(): - def allow_no_tokens(batch_id, sent, max_index): - return [] - - logits_processor = PrefixConstrainedLogitsProcessorWithMaximum( - prefix_allowed_tokens_fn=allow_no_tokens, num_beams=1 - ) - - input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7]]).to(dtype=torch.long) - scores = torch.tensor([[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.0]]).to(dtype=torch.float) - - with pytest.raises(ValueError, match="No allowed token ids for batch_id"): - logits_processor(input_ids, scores) - - -def test_finitize_logits_processor(): - logits_processor = FinitizeLogitsProcessor() - - input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7]]).to(dtype=torch.long) - scores = torch.tensor([[0.9, 0.9, float("inf"), 0.9, 0.9, -float("inf"), 0.0]]).to( - dtype=torch.float - ) - new_scores = logits_processor(input_ids, scores) - - assert new_scores.shape == scores.shape - torch.testing.assert_close( - new_scores, - torch.tensor([[0.9, 0.9, 3.4028235e38, 0.9, 0.9, -3.4028235e38, 0.0]]), - ) diff --git a/tests/taskmodules/test_cross_text_binary_coref.py b/tests/taskmodules/test_cross_text_binary_coref.py deleted file mode 100644 index 065f9b519..000000000 --- a/tests/taskmodules/test_cross_text_binary_coref.py +++ /dev/null @@ -1,395 +0,0 @@ -import json -from typing import Any, Dict, Union - -import pytest -import torch.testing -from pie_core.utils.dictionary import flatten_dict_s, list_of_dicts2dict_of_lists -from torch import tensor -from torchmetrics import Metric, MetricCollection - -from pie_modules.annotations import LabeledSpan -from pie_modules.document.processing.text_pair import add_negative_coref_relations -from pie_modules.documents import ( - BinaryCorefRelation, - TextPairDocumentWithLabeledSpansAndBinaryCorefRelations, -) -from pie_modules.taskmodules import CrossTextBinaryCorefTaskModule -from tests import FIXTURES_ROOT, _config_to_str - -TOKENIZER_NAME_OR_PATH = "bert-base-cased" -DOC_IDX_WITH_TASK_ENCODINGS = 2 - -CONFIGS = [ - {}, -] -CONFIGS_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIGS_DICT.keys()) -def config(request): - return CONFIGS_DICT[request.param] - - -@pytest.fixture(scope="module") -def positive_documents(): - doc1 = TextPairDocumentWithLabeledSpansAndBinaryCorefRelations( - id="0", text="Entity A works at B.", text_pair="And she founded C." - ) - doc1.labeled_spans.append(LabeledSpan(start=0, end=8, label="PERSON")) - doc1.labeled_spans.append(LabeledSpan(start=18, end=19, label="COMPANY")) - doc1.labeled_spans_pair.append(LabeledSpan(start=4, end=7, label="PERSON")) - doc1.labeled_spans_pair.append(LabeledSpan(start=16, end=17, label="COMPANY")) - doc1.binary_coref_relations.append( - BinaryCorefRelation(head=doc1.labeled_spans[0], tail=doc1.labeled_spans_pair[0]) - ) - - doc2 = TextPairDocumentWithLabeledSpansAndBinaryCorefRelations( - id="0", text="Bob loves his cat.", text_pair="She sleeps a lot." - ) - doc2.labeled_spans.append(LabeledSpan(start=0, end=3, label="PERSON")) - doc2.labeled_spans.append(LabeledSpan(start=10, end=17, label="ANIMAL")) - doc2.labeled_spans_pair.append(LabeledSpan(start=0, end=3, label="ANIMAL")) - doc2.binary_coref_relations.append( - BinaryCorefRelation(head=doc2.labeled_spans[1], tail=doc2.labeled_spans_pair[0]) - ) - - return [doc1, doc2] - - -def test_positive_documents(positive_documents): - assert len(positive_documents) == 2 - doc1, doc2 = positive_documents - assert doc1.labeled_spans.resolve() == [("PERSON", "Entity A"), ("COMPANY", "B")] - assert doc1.labeled_spans_pair.resolve() == [("PERSON", "she"), ("COMPANY", "C")] - assert doc1.binary_coref_relations.resolve() == [ - ("coref", (("PERSON", "Entity A"), ("PERSON", "she"))) - ] - - assert doc2.labeled_spans.resolve() == [("PERSON", "Bob"), ("ANIMAL", "his cat")] - assert doc2.labeled_spans_pair.resolve() == [("ANIMAL", "She")] - assert doc2.binary_coref_relations.resolve() == [ - ("coref", (("ANIMAL", "his cat"), ("ANIMAL", "She"))) - ] - - -@pytest.fixture(scope="module") -def unprepared_taskmodule(config): - taskmodule = CrossTextBinaryCorefTaskModule( - tokenizer_name_or_path=TOKENIZER_NAME_OR_PATH, **config - ) - assert not taskmodule.is_from_pretrained - - return taskmodule - - -@pytest.fixture(scope="module") -def taskmodule(unprepared_taskmodule, positive_documents): - unprepared_taskmodule.prepare(positive_documents) - return unprepared_taskmodule - - -@pytest.fixture(scope="module") -def documents_with_negatives(taskmodule, positive_documents): - file_name = ( - FIXTURES_ROOT / "taskmodules" / "cross_text_binary_coref" / "documents_with_negatives.json" - ) - - # result = list(add_negative_relations(positive_documents)) - # result_json = [doc.asdict() for doc in result] - # with open(file_name, "w") as f: - # json.dump(result_json, f, indent=2) - - with open(file_name) as f: - result_json = json.load(f) - result = [ - TextPairDocumentWithLabeledSpansAndBinaryCorefRelations.fromdict(doc_json) - for doc_json in result_json - ] - - return result - - -@pytest.fixture(scope="module") -def task_encodings_without_target(taskmodule, documents_with_negatives): - task_encodings = taskmodule.encode_input(documents_with_negatives[DOC_IDX_WITH_TASK_ENCODINGS]) - return task_encodings - - -def test_encode_input(task_encodings_without_target, taskmodule): - task_encodings = task_encodings_without_target - convert_ids_to_tokens = taskmodule.tokenizer.convert_ids_to_tokens - - inputs_dict = list_of_dicts2dict_of_lists( - [task_encoding.inputs for task_encoding in task_encodings] - ) - tokens = [convert_ids_to_tokens(encoding["input_ids"]) for encoding in inputs_dict["encoding"]] - tokens_pair = [ - convert_ids_to_tokens(encoding["input_ids"]) for encoding in inputs_dict["encoding_pair"] - ] - assert tokens == [ - ["[CLS]", "And", "she", "founded", "C", ".", "[SEP]"], - ["[CLS]", "And", "she", "founded", "C", ".", "[SEP]"], - ] - assert tokens_pair == [ - ["[CLS]", "En", "##ti", "##ty", "A", "works", "at", "B", ".", "[SEP]"], - ["[CLS]", "En", "##ti", "##ty", "A", "works", "at", "B", ".", "[SEP]"], - ] - span_tokens = [ - toks[start:end] - for toks, start, end in zip( - tokens, inputs_dict["pooler_start_indices"], inputs_dict["pooler_end_indices"] - ) - ] - span_tokens_pair = [ - toks[start:end] - for toks, start, end in zip( - tokens_pair, - inputs_dict["pooler_pair_start_indices"], - inputs_dict["pooler_pair_end_indices"], - ) - ] - assert span_tokens == [["she"], ["C"]] - assert span_tokens_pair == [["En", "##ti", "##ty", "A"], ["B"]] - - -def test_encode_target(task_encodings_without_target, taskmodule): - targets = [ - taskmodule.encode_target(task_encoding) for task_encoding in task_encodings_without_target - ] - assert targets == [1.0, 0.0] - - -def test_encode_with_collect_statistics(taskmodule, positive_documents): - documents_with_negatives = add_negative_coref_relations(positive_documents) - original_values = taskmodule.collect_statistics - taskmodule.collect_statistics = True - taskmodule.encode(documents_with_negatives, encode_target=True) - statistics = taskmodule.get_statistics() - taskmodule.collect_statistics = original_values - - assert statistics == { - ("available", "coref"): 4, - ("available", "no_relation"): 6, - ("used", "coref"): 4, - ("used", "no_relation"): 6, - } - - -def test_encode_with_windowing(documents_with_negatives): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = CrossTextBinaryCorefTaskModule( - tokenizer_name_or_path=tokenizer_name_or_path, - max_window=4, - collect_statistics=True, - ) - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents_with_negatives) - - assert len(documents_with_negatives) == 16 - - task_encodings = taskmodule.encode(documents_with_negatives) - statistics = taskmodule.get_statistics() - - assert statistics == { - ("available", "coref"): 4, - ("available", "no_relation"): 6, - ("skipped_span_does_not_fit_into_window", "coref"): 2, - ("skipped_span_does_not_fit_into_window", "no_relation"): 2, - ("used", "coref"): 2, - ("used", "no_relation"): 4, - } - - assert len(task_encodings) == 6 - for task_encoding in task_encodings: - for k, v in task_encoding.inputs["encoding"].items(): - assert len(v) <= taskmodule.max_window - for k, v in task_encoding.inputs["encoding_pair"].items(): - assert len(v) <= taskmodule.max_window - - -@pytest.fixture(scope="module") -def task_encodings(taskmodule, documents_with_negatives): - return taskmodule.encode( - documents_with_negatives[DOC_IDX_WITH_TASK_ENCODINGS], encode_target=True - ) - - -@pytest.fixture(scope="module") -def batch(taskmodule, task_encodings): - result = taskmodule.collate(task_encodings) - return result - - -def test_collate(batch, taskmodule): - assert batch is not None - inputs, targets = batch - assert inputs is not None - assert set(inputs) == { - "pooler_end_indices", - "encoding_pair", - "pooler_pair_end_indices", - "pooler_start_indices", - "encoding", - "pooler_pair_start_indices", - } - torch.testing.assert_close( - inputs["encoding"]["input_ids"], - torch.tensor( - [[101, 1262, 1131, 1771, 140, 119, 102], [101, 1262, 1131, 1771, 140, 119, 102]] - ), - ) - torch.testing.assert_close( - inputs["encoding"]["token_type_ids"], torch.zeros_like(inputs["encoding"]["input_ids"]) - ) - torch.testing.assert_close( - inputs["encoding"]["attention_mask"], torch.ones_like(inputs["encoding"]["input_ids"]) - ) - - torch.testing.assert_close( - inputs["encoding_pair"]["input_ids"], - torch.tensor( - [ - [101, 13832, 3121, 2340, 138, 1759, 1120, 139, 119, 102], - [101, 13832, 3121, 2340, 138, 1759, 1120, 139, 119, 102], - ] - ), - ) - torch.testing.assert_close( - inputs["encoding_pair"]["token_type_ids"], - torch.zeros_like(inputs["encoding_pair"]["input_ids"]), - ) - torch.testing.assert_close( - inputs["encoding_pair"]["attention_mask"], - torch.ones_like(inputs["encoding_pair"]["input_ids"]), - ) - - torch.testing.assert_close(inputs["pooler_start_indices"], torch.tensor([[2], [4]])) - torch.testing.assert_close(inputs["pooler_end_indices"], torch.tensor([[3], [5]])) - torch.testing.assert_close(inputs["pooler_pair_start_indices"], torch.tensor([[1], [7]])) - torch.testing.assert_close(inputs["pooler_pair_end_indices"], torch.tensor([[5], [8]])) - - torch.testing.assert_close(targets, {"scores": torch.tensor([1.0, 0.0])}) - - -@pytest.fixture(scope="module") -def unbatched_output(taskmodule): - model_output = { - "scores": torch.tensor([0.5338148474693298, 0.9866107940673828]), - } - return taskmodule.unbatch_output(model_output=model_output) - - -def test_unbatch_output(unbatched_output, taskmodule): - assert len(unbatched_output) == 2 - assert unbatched_output == [ - {"is_similar": False, "score": 0.5338148474693298}, - {"is_similar": True, "score": 0.9866107702255249}, - ] - - -def test_create_annotation_from_output(taskmodule, task_encodings, unbatched_output): - all_new_annotations = [] - for task_encoding, task_output in zip(task_encodings, unbatched_output): - for new_annotation in taskmodule.create_annotations_from_output( - task_encoding=task_encoding, task_output=task_output - ): - all_new_annotations.append(new_annotation) - assert all(layer_name == "binary_coref_relations" for layer_name, ann in all_new_annotations) - resolve_annotations_with_scores = [ - (round(ann.score, 4), ann.resolve()) for layer_name, ann in all_new_annotations - ] - assert resolve_annotations_with_scores == [ - (0.9866, ("coref", (("COMPANY", "C"), ("COMPANY", "B")))), - ] - - -def get_metric_state(metric_or_collection: Union[Metric, MetricCollection]) -> Dict[str, Any]: - if isinstance(metric_or_collection, Metric): - return flatten_dict_s(metric_or_collection.metric_state) - elif isinstance(metric_or_collection, MetricCollection): - return flatten_dict_s({k: get_metric_state(v) for k, v in metric_or_collection.items()}) - else: - raise ValueError(f"unsupported type: {type(metric_or_collection)}") - - -def test_configure_metric(taskmodule, batch): - metric = taskmodule.configure_model_metric(stage="train") - - assert isinstance(metric, (Metric, MetricCollection)) - state = get_metric_state(metric) - torch.testing.assert_close( - state, - { - "continuous/auroc/preds": [], - "continuous/auroc/target": [], - "continuous/avg-P/preds": [], - "continuous/avg-P/target": [], - "continuous/f1/fn": tensor([0]), - "continuous/f1/fp": tensor([0]), - "continuous/f1/tn": tensor([0]), - "continuous/f1/tp": tensor([0]), - }, - ) - - # targets = batch[1] - targets = { - "scores": torch.tensor([0.0, 1.0, 0.0, 0.0]), - } - metric.update(targets, targets) - - state = get_metric_state(metric) - torch.testing.assert_close( - state, - { - "continuous/auroc/preds": [tensor([0.0, 1.0, 0.0, 0.0])], - "continuous/auroc/target": [tensor([0.0, 1.0, 0.0, 0.0])], - "continuous/avg-P/preds": [tensor([0.0, 1.0, 0.0, 0.0])], - "continuous/avg-P/target": [tensor([0.0, 1.0, 0.0, 0.0])], - "continuous/f1/tp": tensor([1]), - "continuous/f1/fp": tensor([0]), - "continuous/f1/tn": tensor([3]), - "continuous/f1/fn": tensor([0]), - }, - ) - - torch.testing.assert_close( - metric.compute(), - {"auroc": tensor(1.0), "avg-P": tensor(1.0), "f1": tensor(1.0)}, - ) - - # torch.rand_like(targets) - random_targets = { - "scores": torch.tensor([0.2703, 0.6812, 0.2582, 0.9030]), - } - metric.update(random_targets, targets) - state = get_metric_state(metric) - torch.testing.assert_close( - state, - { - "continuous/auroc/preds": [ - tensor([0.0, 1.0, 0.0, 0.0]), - tensor([0.2703, 0.6812, 0.2582, 0.9030]), - ], - "continuous/auroc/target": [ - tensor([0.0, 1.0, 0.0, 0.0]), - tensor([0.0, 1.0, 0.0, 0.0]), - ], - "continuous/avg-P/preds": [ - tensor([0.0, 1.0, 0.0, 0.0]), - tensor([0.2703, 0.6812, 0.2582, 0.9030]), - ], - "continuous/avg-P/target": [ - tensor([0.0, 1.0, 0.0, 0.0]), - tensor([0.0, 1.0, 0.0, 0.0]), - ], - "continuous/f1/tp": tensor([1]), - "continuous/f1/fp": tensor([1]), - "continuous/f1/tn": tensor([5]), - "continuous/f1/fn": tensor([1]), - }, - ) - - torch.testing.assert_close( - metric.compute(), - {"auroc": tensor(0.91666663), "avg-P": tensor(0.83333337), "f1": tensor(0.50000000)}, - ) diff --git a/tests/taskmodules/test_extractive_question_answering.py b/tests/taskmodules/test_extractive_question_answering.py deleted file mode 100644 index d38e532d6..000000000 --- a/tests/taskmodules/test_extractive_question_answering.py +++ /dev/null @@ -1,277 +0,0 @@ -import pytest -import torch -import transformers -from pie_core import AnnotationLayer - -from pie_modules.annotations import ExtractiveAnswer, Question -from pie_modules.documents import TextDocumentWithQuestionsAndExtractiveAnswers -from pie_modules.taskmodules.extractive_question_answering import ( - ExtractiveQuestionAnsweringTaskModule, -) - - -@pytest.fixture() -def document(): - document = TextDocumentWithQuestionsAndExtractiveAnswers( - text="This is a test document", id="doc0" - ) - document.questions.append(Question(text="What is the first word?")) - document.answers.append(ExtractiveAnswer(question=document.questions[0], start=0, end=4)) - assert str(document.answers[0]) == "This" - return document - - -@pytest.fixture() -def document1(): - document1 = TextDocumentWithQuestionsAndExtractiveAnswers( - text="This is the second document", id="doc1" - ) - document1.questions.append(Question(text="Which document is this?")) - document1.answers.append(ExtractiveAnswer(question=document1.questions[0], start=13, end=18)) - assert str(document1.answers[0]) == "second" - return document1 - - -@pytest.fixture() -def document_with_no_answer(): - document = TextDocumentWithQuestionsAndExtractiveAnswers( - text="This is a test document", id="document_with_no_answer" - ) - document.questions.append(Question(text="What is the first word?")) - return document - - -@pytest.fixture() -def document_with_multiple_answers(): - document = TextDocumentWithQuestionsAndExtractiveAnswers( - text="This is a test document", id="document_with_multiple_answers" - ) - document.questions.append(Question(text="What is the first word?")) - document.answers.append(ExtractiveAnswer(question=document.questions[0], start=0, end=4)) - assert str(document.answers[0]) == "This" - document.answers.append(ExtractiveAnswer(question=document.questions[0], start=0, end=7)) - assert str(document.answers[1]) == "This is" - return document - - -@pytest.fixture() -def taskmodule(): - return ExtractiveQuestionAnsweringTaskModule( - tokenizer_name_or_path="bert-base-uncased", max_length=128 - ) - - -def test_encode_input( - taskmodule, document, document_with_no_answer, document_with_multiple_answers -): - inputs = taskmodule.encode_input(document) - assert inputs is not None - assert len(inputs) == 1 - expected_inputs = [ - 101, - 2054, - 2003, - 1996, - 2034, - 2773, - 1029, - 102, - 2023, - 2003, - 1037, - 3231, - 6254, - 102, - ] - assert inputs[0].inputs == expected_inputs - - inputs = taskmodule.encode_input(document_with_no_answer) - assert inputs is not None - assert len(inputs) == 1 - assert inputs[0].inputs == expected_inputs - - inputs = taskmodule.encode_input(document_with_multiple_answers) - assert inputs is not None - assert len(inputs) == 1 - assert inputs[0].inputs == expected_inputs - - -def test_encode_target(taskmodule, document, document_with_no_answer): - inputs = taskmodule.encode_input(document) - targets = taskmodule.encode_target(inputs[0]) - assert targets is not None - assert targets.start_position == 8 - assert targets.end_position == 8 - - inputs = taskmodule.encode_input(document_with_no_answer) - targets = taskmodule.encode_target(inputs[0]) - assert targets is not None - assert targets.start_position == 0 - assert targets.end_position == 0 - - -def test_get_question_layer(taskmodule, document, document_with_no_answer): - question_layer = taskmodule.get_question_layer(document) - assert question_layer is not None - assert len(question_layer) == 1 - assert type(question_layer) is AnnotationLayer - assert type(question_layer[0]) is Question - assert question_layer[0].text == "What is the first word?" - - question_layer = taskmodule.get_question_layer(document_with_no_answer) - assert question_layer is not None - assert len(question_layer) == 1 - assert type(question_layer) is AnnotationLayer - assert type(question_layer[0]) is Question - assert question_layer[0].text == "What is the first word?" - - -def test_get_answer_layer(taskmodule, document, document_with_no_answer): - answer_layer = taskmodule.get_answer_layer(document) - assert answer_layer is not None - assert len(answer_layer) == 1 - assert type(answer_layer) is AnnotationLayer - assert type(answer_layer[0]) is ExtractiveAnswer - assert answer_layer[0].question.text == "What is the first word?" - assert answer_layer[0].start == 0 - assert answer_layer[0].end == 4 - - answer_layer = taskmodule.get_answer_layer(document_with_no_answer) - assert answer_layer is not None - assert len(answer_layer) == 0 - assert type(answer_layer) is AnnotationLayer - - -def test_get_context(taskmodule, document, document_with_no_answer): - context = taskmodule.get_context(document) - assert context is not None - assert context == "This is a test document" - - context = taskmodule.get_context(document_with_no_answer) - assert context is not None - assert context == "This is a test document" - - -@pytest.fixture() -def documents(document, document_with_no_answer): - return [document, document_with_no_answer] - - -@pytest.fixture() -def batch_without_targets(taskmodule, documents): - task_encodings = taskmodule.encode(documents) - batch_encoding = taskmodule.collate(task_encodings) - return batch_encoding - - -def test_collate_without_targets(batch_without_targets): - assert batch_without_targets is not None - assert len(batch_without_targets) == 2 - inputs, targets = batch_without_targets - assert inputs is not None - assert targets is None - - -@pytest.fixture() -def task_encodings(taskmodule, documents): - task_encodings = taskmodule.encode(documents, encode_target=True) - return task_encodings - - -@pytest.fixture() -def batch(taskmodule, task_encodings): - batch_encoding = taskmodule.collate(task_encodings) - return batch_encoding - - -def test_collate_with_targets(batch): - assert batch is not None - assert len(batch) == 2 - inputs, targets = batch - assert inputs is not None - assert set(inputs.data) == {"input_ids", "token_type_ids", "attention_mask"} - assert inputs.data["input_ids"].shape == (2, 14) - assert inputs.data["token_type_ids"].shape == (2, 14) - assert inputs.data["attention_mask"].shape == (2, 14) - assert targets is not None - assert set(targets) == {"start_positions", "end_positions"} - assert targets["start_positions"].shape == (2,) - assert targets["end_positions"].shape == (2,) - - expected_inputs_ids = [ - [101, 2054, 2003, 1996, 2034, 2773, 1029, 102, 2023, 2003, 1037, 3231, 6254, 102], - [101, 2054, 2003, 1996, 2034, 2773, 1029, 102, 2023, 2003, 1037, 3231, 6254, 102], - ] - expected_token_type_ids = [ - [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - ] - expected_attention_mask = [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - assert inputs.data["input_ids"].tolist() == expected_inputs_ids - assert inputs.data["token_type_ids"].tolist() == expected_token_type_ids - assert inputs.data["attention_mask"].tolist() == expected_attention_mask - - expected_start_positions = [8, 0] - expected_end_positions = [8, 0] - assert targets["start_positions"].tolist() == expected_start_positions - assert targets["end_positions"].tolist() == expected_end_positions - - -@pytest.fixture() -def model_outputs(batch): - # create probabilities that "perfectly" model the batch targets - inputs, targets = batch - start_probs = torch.zeros_like(inputs.input_ids, dtype=torch.float) + 0.05 - end_probs = torch.zeros_like(inputs.input_ids, dtype=torch.float) + 0.05 - # set target positions to 0.95 as a dummy value - for idx, (start_position, end_position) in enumerate( - zip(targets["start_positions"], targets["end_positions"]) - ): - start_probs[idx, start_position] = 0.95 - end_probs[idx, end_position] = 0.95 - - # convert probs to logits - start_logits = torch.log(start_probs / (1 - start_probs)) - end_logits = torch.log(end_probs / (1 - end_probs)) - - model_outputs = transformers.modeling_outputs.QuestionAnsweringModelOutput( - start_logits=start_logits, - end_logits=end_logits, - ) - return model_outputs - - -@pytest.fixture() -def unbatched_output(taskmodule, model_outputs): - return taskmodule.unbatch_output(model_outputs) - - -def test_unbatch_output(unbatched_output): - assert unbatched_output is not None - assert len(unbatched_output) == 2 - # check first result - assert unbatched_output[0].start == 8 - assert unbatched_output[0].end == 8 - assert unbatched_output[0].start_probability == pytest.approx(0.9652407) - assert unbatched_output[0].end_probability == pytest.approx(0.9652407) - # check second result - assert unbatched_output[1].start == 0 - assert unbatched_output[1].end == 0 - assert unbatched_output[1].start_probability == pytest.approx(0.9652407) - assert unbatched_output[1].end_probability == pytest.approx(0.9652407) - - -def test_create_annotations_from_output(taskmodule, task_encodings, unbatched_output, documents): - taskmodule.combine_outputs(task_encodings, unbatched_output) - assert len(documents) > 0 - for doc in documents: - gold_annotations = doc.answers - predicted_annotations = doc.answers.predictions - assert len(predicted_annotations) == len(gold_annotations) - for predicted_annotation, gold_annotation in zip(predicted_annotations, gold_annotations): - # we did construct the predicted annotations from the gold annotations, so they should be equal - assert predicted_annotation == gold_annotation - assert predicted_annotation.score == pytest.approx(0.9316896200180054) diff --git a/tests/taskmodules/test_labeled_span_extraction_by_token_classification.py b/tests/taskmodules/test_labeled_span_extraction_by_token_classification.py deleted file mode 100644 index b05374a9b..000000000 --- a/tests/taskmodules/test_labeled_span_extraction_by_token_classification.py +++ /dev/null @@ -1,883 +0,0 @@ -import logging -import pickle -from collections import defaultdict -from dataclasses import dataclass -from typing import Any, Dict, List - -import pytest -import torch -from pie_core import AnnotationLayer, annotation_field -from torch import tensor -from transformers import BatchEncoding - -from pie_modules.annotations import LabeledSpan -from pie_modules.documents import ( - TextBasedDocument, - TextDocumentWithLabeledSpans, - TextDocumentWithLabeledSpansAndLabeledPartitions, -) -from pie_modules.taskmodules import LabeledSpanExtractionByTokenClassificationTaskModule -from pie_modules.taskmodules.labeled_span_extraction_by_token_classification import ( - ModelOutputType, -) - - -def _config_to_str(cfg: Dict[str, Any]) -> str: - # Converts a configuration dictionary to a string representation - result = "-".join([f"{k}={cfg[k]}" for k in sorted(cfg)]) - return result - - -CONFIG_DEFAULT = {} -CONFIG_MAX_WINDOW = { - "tokenize_kwargs": {"max_length": 8, "truncation": True, "return_overflowing_tokens": True} -} -CONFIG_MAX_WINDOW_WITH_STRIDE = { - "tokenize_kwargs": { - "max_length": 8, - "stride": 2, - "truncation": True, - "return_overflowing_tokens": True, - } -} -CONFIG_PARTITIONS = {"partition_annotation": "sentences"} - -CONFIGS: List[Dict[str, Any]] = [ - CONFIG_DEFAULT, - CONFIG_MAX_WINDOW, - CONFIG_MAX_WINDOW_WITH_STRIDE, - CONFIG_PARTITIONS, -] - -CONFIGS_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIGS_DICT.keys()) -def config(request): - """ - - Provides clean and readable test configurations. - - Yields config dictionaries from the CONFIGS list to produce clean test case identifiers. - - """ - return CONFIGS_DICT[request.param] - - -@pytest.fixture(scope="module") -def config_str(config): - # Fixture returning a string representation of the config - return _config_to_str(config) - - -@pytest.fixture(scope="module") -def unprepared_taskmodule(config): - """ - - Prepares a task module with the specified tokenizer and configuration. - - Sets up the task module with a unprepared state for testing purposes. - - """ - return LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", span_annotation="entities", **config - ) - - -@dataclass -class ExampleDocument(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - sentences: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - -@pytest.fixture(scope="module") -def documents(): - """ - - Creates example documents with predefined texts. - - Assigns labels to the documents for testing purposes. - - """ - doc1 = ExampleDocument(text="Mount Everest is the highest peak in the world.", id="doc1") - doc1.entities.append(LabeledSpan(start=0, end=13, label="LOC")) - assert str(doc1.entities[0]) == "Mount Everest" - - doc2 = ExampleDocument(text="Alice loves reading books. Bob enjoys playing soccer.", id="doc2") - doc2.entities.append(LabeledSpan(start=0, end=5, label="PER")) - assert str(doc2.entities[0]) == "Alice" - doc2.entities.append(LabeledSpan(start=27, end=30, label="PER")) - assert str(doc2.entities[1]) == "Bob" - # we add just one sentence to doc2 that covers only Bob - doc2.sentences.append(LabeledSpan(start=27, end=53, label="sentence")) - assert str(doc2.sentences[0]) == "Bob enjoys playing soccer." - - return [doc1, doc2] - - -def test_taskmodule(unprepared_taskmodule): - assert unprepared_taskmodule is not None - - -@pytest.fixture(scope="module") -def taskmodule(unprepared_taskmodule, documents): - """ - - Prepares the task module with the given documents, i.e. collect available label values. - - Calls the necessary methods to prepare the task module with the documents. - - Calls _prepare(documents) and then _post_prepare() - - """ - unprepared_taskmodule.prepare(documents) - return unprepared_taskmodule - - -def test_prepare(taskmodule): - assert taskmodule is not None - assert taskmodule.is_prepared - assert taskmodule.label_to_id == {"B-LOC": 1, "B-PER": 3, "I-LOC": 2, "I-PER": 4, "O": 0} - assert taskmodule.id_to_label == {0: "O", 1: "B-LOC", 2: "I-LOC", 3: "B-PER", 4: "I-PER"} - - -def test_config(taskmodule): - config = taskmodule._config() - assert config["taskmodule_type"] == "LabeledSpanExtractionByTokenClassificationTaskModule" - assert "labels" in config - assert config["labels"] == ["LOC", "PER"] - - -@pytest.fixture(scope="module") -def task_encodings_without_targets(taskmodule, documents): - """ - - Generates task encodings for all the documents, but without associated targets. - """ - return taskmodule.encode(documents, encode_target=False) - - -def test_task_encodings_without_targets(task_encodings_without_targets, taskmodule, config): - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(task_encoding.inputs.ids) - for task_encoding in task_encodings_without_targets - ] - - # If config is empty - if config == CONFIG_DEFAULT: - assert tokens == [ - [ - "[CLS]", - "mount", - "everest", - "is", - "the", - "highest", - "peak", - "in", - "the", - "world", - ".", - "[SEP]", - ], - [ - "[CLS]", - "alice", - "loves", - "reading", - "books", - ".", - "bob", - "enjoys", - "playing", - "soccer", - ".", - "[SEP]", - ], - ] - - # If config has the specified values (max_window=8, window_overlap=2) - elif config == CONFIG_MAX_WINDOW_WITH_STRIDE: - for t in tokens: - assert len(t) <= 8 - - assert tokens == [ - ["[CLS]", "mount", "everest", "is", "the", "highest", "peak", "[SEP]"], - ["[CLS]", "highest", "peak", "in", "the", "world", ".", "[SEP]"], - ["[CLS]", "alice", "loves", "reading", "books", ".", "bob", "[SEP]"], - ["[CLS]", ".", "bob", "enjoys", "playing", "soccer", ".", "[SEP]"], - ] - - # If config has the specified value (max_window=8) - elif config == CONFIG_MAX_WINDOW: - for t in tokens: - assert len(t) <= 8 - - assert tokens == [ - ["[CLS]", "mount", "everest", "is", "the", "highest", "peak", "[SEP]"], - ["[CLS]", "in", "the", "world", ".", "[SEP]"], - ["[CLS]", "alice", "loves", "reading", "books", ".", "bob", "[SEP]"], - ["[CLS]", "enjoys", "playing", "soccer", ".", "[SEP]"], - ] - - # If config has the specified value (partition_annotation=sentences) - elif config == CONFIG_PARTITIONS: - assert tokens - - else: - raise ValueError(f"unknown config: {config}") - - -@pytest.fixture(scope="module") -def task_encodings(taskmodule, documents): - return taskmodule.encode(documents, encode_target=True) - - -def test_task_encodings(task_encodings, taskmodule, config): - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(task_encoding.inputs.ids) - for task_encoding in task_encodings - ] - labels_tokens = [ - [taskmodule.id_to_label[x] if x != -100 else "" for x in task_encoding.targets] - for task_encoding in task_encodings - ] - assert len(labels_tokens) == len(tokens) - - tokens_with_labels = list(zip(tokens, labels_tokens)) - - for tokens, labels in tokens_with_labels: - assert len(tokens) == len(labels) - - # If config is empty - if config == CONFIG_DEFAULT: - assert tokens_with_labels == [ - ( - [ - "[CLS]", - "mount", - "everest", - "is", - "the", - "highest", - "peak", - "in", - "the", - "world", - ".", - "[SEP]", - ], - ["", "B-LOC", "I-LOC", "O", "O", "O", "O", "O", "O", "O", "O", ""], - ), - ( - [ - "[CLS]", - "alice", - "loves", - "reading", - "books", - ".", - "bob", - "enjoys", - "playing", - "soccer", - ".", - "[SEP]", - ], - ["", "B-PER", "O", "O", "O", "O", "B-PER", "O", "O", "O", "O", ""], - ), - ] - - # If config has the specified values (max_window=8, window_overlap=2) - elif config == CONFIG_MAX_WINDOW_WITH_STRIDE: - for tokens, labels in tokens_with_labels: - assert len(tokens) <= 8 - - assert tokens_with_labels == [ - ( - ["[CLS]", "mount", "everest", "is", "the", "highest", "peak", "[SEP]"], - ["", "B-LOC", "I-LOC", "O", "O", "O", "O", ""], - ), - ( - ["[CLS]", "highest", "peak", "in", "the", "world", ".", "[SEP]"], - ["", "O", "O", "O", "O", "O", "O", ""], - ), - ( - ["[CLS]", "alice", "loves", "reading", "books", ".", "bob", "[SEP]"], - ["", "B-PER", "O", "O", "O", "O", "B-PER", ""], - ), - ( - ["[CLS]", ".", "bob", "enjoys", "playing", "soccer", ".", "[SEP]"], - ["", "O", "B-PER", "O", "O", "O", "O", ""], - ), - ] - - # If config has the specified value (max_window=8) - elif config == CONFIG_MAX_WINDOW: - for tokens, labels in tokens_with_labels: - assert len(tokens) <= 8 - - assert tokens_with_labels == [ - ( - ["[CLS]", "mount", "everest", "is", "the", "highest", "peak", "[SEP]"], - ["", "B-LOC", "I-LOC", "O", "O", "O", "O", ""], - ), - ( - ["[CLS]", "in", "the", "world", ".", "[SEP]"], - ["", "O", "O", "O", "O", ""], - ), - ( - ["[CLS]", "alice", "loves", "reading", "books", ".", "bob", "[SEP]"], - ["", "B-PER", "O", "O", "O", "O", "B-PER", ""], - ), - ( - ["[CLS]", "enjoys", "playing", "soccer", ".", "[SEP]"], - ["", "O", "O", "O", "O", ""], - ), - ] - - # If config has the specified value (partition_annotation=sentences) - elif config == CONFIG_PARTITIONS: - assert tokens_with_labels == [ - ( - ["[CLS]", "bob", "enjoys", "playing", "soccer", ".", "[SEP]"], - ["", "B-PER", "O", "O", "O", "O", ""], - ) - ] - - else: - raise ValueError(f"unknown config: {config}") - - -def test_encode_targets_with_overlap(caplog): - # setup taskmodule - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", labels=["LOC", "PER"] - ) - taskmodule.post_prepare() - - # create a document with overlapping entities - doc = TextDocumentWithLabeledSpans( - text="Alice loves reading books. Bob enjoys playing soccer." - ) - doc.labeled_spans.append(LabeledSpan(start=0, end=5, label="PER")) - doc.labeled_spans.append(LabeledSpan(start=27, end=30, label="PER")) - doc.labeled_spans.append(LabeledSpan(start=27, end=37, label="PER")) - assert str(doc.labeled_spans[0]) == "Alice" - assert str(doc.labeled_spans[1]) == "Bob" - assert str(doc.labeled_spans[2]) == "Bob enjoys" - - # encode the document - with caplog.at_level(logging.WARNING): - task_encodings = taskmodule.encode([doc], encode_target=True) - assert len(caplog.records) == 1 - assert ( - caplog.messages[0] - == "tag already assigned (current span has an overlap: ('bob', 'enjoys'))." - ) - assert len(task_encodings) == 1 - assert task_encodings[0].targets == [-100, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, -100] - - -@pytest.fixture(scope="module") -def task_encodings_for_batch(task_encodings, config): - # just take everything we have - return task_encodings - - -@pytest.fixture(scope="module") -def batch(taskmodule, task_encodings_for_batch, config) -> BatchEncoding: - return taskmodule.collate(task_encodings_for_batch) - - -def test_collate(batch, config): - assert batch is not None - assert len(batch) == 2 - inputs, targets = batch - - assert set(inputs.data) == {"input_ids", "attention_mask", "special_tokens_mask"} - input_ids_list = inputs.input_ids.tolist() - attention_mask_list = inputs.attention_mask.tolist() - special_tokens_mask_list = inputs.special_tokens_mask.tolist() - assert set(targets) == {"labels"} - labels_list = targets["labels"].tolist() - - # If config is empty - if config == CONFIG_DEFAULT: - assert input_ids_list == [ - [101, 4057, 23914, 2003, 1996, 3284, 4672, 1999, 1996, 2088, 1012, 102], - [101, 5650, 7459, 3752, 2808, 1012, 3960, 15646, 2652, 4715, 1012, 102], - ] - assert attention_mask_list == [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - assert labels_list == [ - [-100, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, -100], - [-100, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, -100], - ] - assert special_tokens_mask_list == [ - [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], - [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], - ] - - # If config has the specified values (max_window=8, window_overlap=2) - elif config == CONFIG_MAX_WINDOW_WITH_STRIDE: - assert input_ids_list == [ - [101, 4057, 23914, 2003, 1996, 3284, 4672, 102], - [101, 3284, 4672, 1999, 1996, 2088, 1012, 102], - [101, 5650, 7459, 3752, 2808, 1012, 3960, 102], - [101, 1012, 3960, 15646, 2652, 4715, 1012, 102], - ] - assert attention_mask_list == [ - [1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1], - ] - assert labels_list == [ - [-100, 1, 2, 0, 0, 0, 0, -100], - [-100, 0, 0, 0, 0, 0, 0, -100], - [-100, 3, 0, 0, 0, 0, 3, -100], - [-100, 0, 3, 0, 0, 0, 0, -100], - ] - - # If config has the specified values (max_window=8) - elif config == CONFIG_MAX_WINDOW: - assert input_ids_list == [ - [101, 4057, 23914, 2003, 1996, 3284, 4672, 102], - [101, 1999, 1996, 2088, 1012, 102, 0, 0], - [101, 5650, 7459, 3752, 2808, 1012, 3960, 102], - [101, 15646, 2652, 4715, 1012, 102, 0, 0], - ] - assert attention_mask_list == [ - [1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 0, 0], - ] - assert labels_list == [ - [-100, 1, 2, 0, 0, 0, 0, -100], - [-100, 0, 0, 0, 0, -100, -100, -100], - [-100, 3, 0, 0, 0, 0, 3, -100], - [-100, 0, 0, 0, 0, -100, -100, -100], - ] - assert special_tokens_mask_list == [ - [1, 0, 0, 0, 0, 0, 0, 1], - [1, 0, 0, 0, 0, 1, 1, 1], - [1, 0, 0, 0, 0, 0, 0, 1], - [1, 0, 0, 0, 0, 1, 1, 1], - ] - - # If config has the specified value (partition_annotation=sentences) - elif config == CONFIG_PARTITIONS: - assert input_ids_list == [[101, 3960, 15646, 2652, 4715, 1012, 102]] - assert attention_mask_list == [[1, 1, 1, 1, 1, 1, 1]] - assert labels_list == [[-100, 3, 0, 0, 0, 0, -100]] - assert special_tokens_mask_list == [[1, 0, 0, 0, 0, 0, 1]] - - else: - raise ValueError(f"unknown config: {config}") - - inputs_expected = BatchEncoding( - data={ - "input_ids": torch.tensor(input_ids_list, dtype=torch.int64), - "attention_mask": torch.tensor(attention_mask_list, dtype=torch.int64), - "special_tokens_mask": torch.tensor(special_tokens_mask_list, dtype=torch.int64), - } - ) - assert set(inputs.data) == set(inputs_expected.data) - labels_expected = torch.tensor(labels_list, dtype=torch.int64) - assert torch.equal(targets["labels"], labels_expected) - - -# This is not used, but can be used to create a batch of task encodings with targets for the unbatched_outputs fixture. -@pytest.fixture(scope="module") -def real_model_output(batch, taskmodule): - from pytorch_ie.models import TransformerTokenClassificationModel - - model = TransformerTokenClassificationModel( - model_name_or_path="prajjwal1/bert-tiny", - num_classes=len(taskmodule.label_to_id), - ) - inputs, targets = batch - result = model(inputs) - return result - - -@pytest.fixture(scope="module") -def model_output(config, batch, taskmodule) -> ModelOutputType: - # create "perfect" output from targets - labels = batch[1]["labels"] - num_classes = len(taskmodule.label_to_id) - # create one-hot encoding from labels - labels_valid = labels.clone() - labels_valid[labels_valid == taskmodule.label_pad_id] = taskmodule.label_to_id["O"] - # create one-hot encoding from labels, but with 0.9 for the correct labels - probabilities = ( - torch.nn.functional.one_hot(labels_valid, num_classes=num_classes).to(torch.float32) * 0.9 - ) - return {"labels": labels, "probabilities": probabilities} - - -@pytest.fixture(scope="module") -def unbatched_outputs(taskmodule, model_output): - return taskmodule.unbatch_output(model_output) - - -@pytest.mark.parametrize("combine_token_scores_method", ["mean", "max", "product", "UNKNOWN"]) -def test_combine_token_scores_method(documents, combine_token_scores_method): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", - span_annotation="entities", - combine_token_scores_method=combine_token_scores_method, - ) - taskmodule.prepare(documents) - - task_encodings = taskmodule.encode(documents, encode_target=True) - batch = taskmodule.collate(task_encodings) - - # create "perfect" output from targets - labels = batch[1]["labels"] - num_classes = len(taskmodule.label_to_id) - # create one-hot encoding from labels - labels_valid = labels.clone() - labels_valid[labels_valid == taskmodule.label_pad_id] = taskmodule.label_to_id["O"] - # create one-hot encoding from labels, but with 0.9 for the correct labels - probabilities = ( - torch.nn.functional.one_hot(labels_valid, num_classes=num_classes).to(torch.float32) * 0.9 - ) - # stepwise decrease the "winning" probabilities per token to test the different combine_token_scores_methods - diff = 0.0 - for i in range(probabilities.size(1)): - probabilities[:, i] -= diff - diff += 0.01 - probabilities[probabilities < 0] = 0.0 - - model_output = {"labels": labels, "probabilities": probabilities} - - unbatched_outputs = taskmodule.unbatch_output(model_output) - - if combine_token_scores_method == "UNKNOWN": - with pytest.raises(ValueError) as excinfo: - taskmodule.decode_annotations(unbatched_outputs[0]) - assert str(excinfo.value) == "combine_token_scores_method=UNKNOWN is not supported." - else: - annotations = [] - scores = [] - for unbatched_output in unbatched_outputs: - decoded_annotations = taskmodule.decode_annotations(unbatched_output) - assert set(decoded_annotations.keys()) == {"labeled_spans"} - # Sort the annotations in each document by start and end position and label - sorted_annotations = sorted(decoded_annotations["labeled_spans"]) - annotations.append(sorted_annotations) - scores.append([round(ann.score, 5) for ann in sorted_annotations]) - - # input values are (before combination): [[0.89, 0.88], [[0.89], [0.84]]] - if combine_token_scores_method == "mean": - assert scores == [[(0.89 + 0.88) / 2], [0.89, 0.84]] - elif combine_token_scores_method == "max": - assert scores == [[0.89], [0.89, 0.84]] - elif combine_token_scores_method == "min": - assert scores == [[0.88], [0.89, 0.84]] - elif combine_token_scores_method == "product": - assert scores == [[0.89 * 0.88], [0.89, 0.84]] - else: - raise ValueError(f"unknown combine_token_scores_method: {combine_token_scores_method}") - - -def test_unbatched_output(unbatched_outputs, config): - assert unbatched_outputs is not None - - if config == CONFIG_DEFAULT: - assert len(unbatched_outputs) == 2 - torch.testing.assert_close( - unbatched_outputs[0]["labels"], - torch.tensor([-100, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, -100]), - ) - torch.testing.assert_close( - unbatched_outputs[1]["labels"], - torch.tensor([-100, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, -100]), - ) - elif config == CONFIG_MAX_WINDOW_WITH_STRIDE: - assert len(unbatched_outputs) == 4 - torch.testing.assert_close( - unbatched_outputs[0]["labels"], torch.tensor([-100, 1, 2, 0, 0, 0, 0, -100]) - ) - torch.testing.assert_close( - unbatched_outputs[1]["labels"], torch.tensor([-100, 0, 0, 0, 0, 0, 0, -100]) - ) - torch.testing.assert_close( - unbatched_outputs[2]["labels"], torch.tensor([-100, 3, 0, 0, 0, 0, 3, -100]) - ) - torch.testing.assert_close( - unbatched_outputs[3]["labels"], torch.tensor([-100, 0, 3, 0, 0, 0, 0, -100]) - ) - elif config == CONFIG_MAX_WINDOW: - assert len(unbatched_outputs) == 4 - torch.testing.assert_close( - unbatched_outputs[0]["labels"], torch.tensor([-100, 1, 2, 0, 0, 0, 0, -100]) - ) - torch.testing.assert_close( - unbatched_outputs[1]["labels"], torch.tensor([-100, 0, 0, 0, 0, -100, -100, -100]) - ) - torch.testing.assert_close( - unbatched_outputs[2]["labels"], torch.tensor([-100, 3, 0, 0, 0, 0, 3, -100]) - ) - torch.testing.assert_close( - unbatched_outputs[3]["labels"], torch.tensor([-100, 0, 0, 0, 0, -100, -100, -100]) - ) - elif config == CONFIG_PARTITIONS: - assert len(unbatched_outputs) == 1 - torch.testing.assert_close( - unbatched_outputs[0]["labels"], torch.tensor([-100, 3, 0, 0, 0, 0, -100]) - ) - else: - raise ValueError(f"unknown config: {config}") - - -def test_decode_annotations(taskmodule, unbatched_outputs, config): - annotations = [] - for unbatched_output in unbatched_outputs: - decoded_annotations = taskmodule.decode_annotations(unbatched_output) - assert set(decoded_annotations.keys()) == {"labeled_spans"} - # Sort the annotations in each document by start and end position and label - annotations.append( - sorted( - decoded_annotations["labeled_spans"], - key=lambda labeled_span: ( - labeled_span.start, - labeled_span.end, - labeled_span.label, - ), - ) - ) - - # Check based on the config - if config == CONFIG_DEFAULT: - assert annotations == [ - [LabeledSpan(start=1, end=3, label="LOC")], - [ - LabeledSpan(start=1, end=2, label="PER"), - LabeledSpan(start=6, end=7, label="PER"), - ], - ] - - elif config == CONFIG_MAX_WINDOW_WITH_STRIDE: - # We get two annotations for Bob because the window overlaps with the previous one. - # This is not a problem because annotations get de-duplicated during serialization. - assert annotations == [ - [LabeledSpan(start=1, end=3, label="LOC")], - [], - [ - LabeledSpan(start=1, end=2, label="PER"), - LabeledSpan(start=6, end=7, label="PER"), - ], - [LabeledSpan(start=2, end=3, label="PER")], - ] - - elif config == CONFIG_MAX_WINDOW: - assert annotations == [ - [LabeledSpan(start=1, end=3, label="LOC")], - [], - [ - LabeledSpan(start=1, end=2, label="PER"), - LabeledSpan(start=6, end=7, label="PER"), - ], - [], - ] - - elif config == CONFIG_PARTITIONS: - assert annotations == [[LabeledSpan(start=1, end=2, label="PER", score=1.0)]] - - else: - raise ValueError(f"unknown config: {config}") - - # assert that all scores are 0.9 - for doc_annotations in annotations: - for annotation in doc_annotations: - assert round(annotation.score, 4) == 0.9 - - -@pytest.fixture(scope="module") -def annotations_from_output(taskmodule, task_encodings_for_batch, unbatched_outputs, config): - named_annotations_per_document = defaultdict(list) - for task_encoding, task_output in zip(task_encodings_for_batch, unbatched_outputs): - annotations = taskmodule.create_annotations_from_output(task_encoding, task_output) - named_annotations_per_document[task_encoding.document.id].extend(list(annotations)) - return named_annotations_per_document - - -def test_annotations_from_output(annotations_from_output, config, documents): - assert annotations_from_output is not None - # Sort the annotations in each document by start and end positions - annotations_from_output = { - doc_id: sorted(annotations, key=lambda x: (x[0], x[1].start, x[1].end)) - for doc_id, annotations in annotations_from_output.items() - } - documents_by_id = {doc.id: doc for doc in documents} - documents_with_annotations = [] - resolved_annotations = defaultdict(list) - # Check that the number of annotations is correct - for doc_id, layer_names_and_annotations in annotations_from_output.items(): - new_doc = documents_by_id[doc_id].copy() - for layer_name, annotation in layer_names_and_annotations: - assert layer_name == "entities" - assert isinstance(annotation, LabeledSpan) - new_doc.entities.predictions.append(annotation) - resolved_annotations[doc_id].append(str(annotation)) - documents_with_annotations.append(new_doc) - - resolved_annotations = dict(resolved_annotations) - # Check based on the config - if config == CONFIG_DEFAULT: - assert resolved_annotations == {"doc1": ["Mount Everest"], "doc2": ["Alice", "Bob"]} - - elif config == CONFIG_MAX_WINDOW_WITH_STRIDE: - # We get two annotations for Bob because the window overlaps with the previous one. - # This is not a problem because annotations get de-duplicated during serialization. - assert resolved_annotations == {"doc1": ["Mount Everest"], "doc2": ["Alice", "Bob", "Bob"]} - - elif config == CONFIG_MAX_WINDOW: - assert resolved_annotations == {"doc1": ["Mount Everest"], "doc2": ["Alice", "Bob"]} - - elif config == CONFIG_PARTITIONS: - assert resolved_annotations == {"doc2": ["Bob"]} - - else: - raise ValueError(f"unknown config: {config}") - - -def test_document_type(): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased" - ) - assert taskmodule.document_type == TextDocumentWithLabeledSpans - - -def test_document_type_with_partitions(): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", partition_annotation="labeled_partitions" - ) - assert taskmodule.document_type == TextDocumentWithLabeledSpansAndLabeledPartitions - - -def test_document_type_with_non_default_span_annotation(caplog): - with caplog.at_level(logging.WARNING): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", span_annotation="entities" - ) - assert taskmodule.document_type is None - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == "WARNING" - assert ( - caplog.records[0].message - == "span_annotation=entities is not the default value ('labeled_spans'), so the taskmodule " - "LabeledSpanExtractionByTokenClassificationTaskModule can not request the usual document type " - "(TextDocumentWithLabeledSpans) for auto-conversion because this has the bespoken default value " - "as layer name(s) instead of the provided one(s)." - ) - - -def test_document_type_with_non_default_partition_annotation(caplog): - with caplog.at_level(logging.WARNING): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", partition_annotation="sentences" - ) - assert taskmodule.document_type is None - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == "WARNING" - assert ( - caplog.records[0].message - == "partition_annotation=sentences is not the default value ('labeled_partitions'), " - "so the taskmodule LabeledSpanExtractionByTokenClassificationTaskModule can not request the usual document type " - "(TextDocumentWithLabeledSpansAndLabeledPartitions) for auto-conversion because this has " - "the bespoken default value as layer name(s) instead of the provided one(s)." - ) - - -def test_document_type_with_non_default_span_and_partition_annotation(caplog): - with caplog.at_level(logging.WARNING): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", - span_annotation="entities", - partition_annotation="sentences", - ) - assert taskmodule.document_type is None - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == "WARNING" - assert ( - caplog.records[0].message - == "span_annotation=entities is not the default value ('labeled_spans') and " - "partition_annotation=sentences is not the default value ('labeled_partitions'), " - "so the taskmodule LabeledSpanExtractionByTokenClassificationTaskModule can not request the usual document " - "type (TextDocumentWithLabeledSpansAndLabeledPartitions) for auto-conversion because " - "this has the bespoken default value as layer name(s) instead of the provided one(s)." - ) - - -def test_configure_model_metric(documents): - taskmodule = LabeledSpanExtractionByTokenClassificationTaskModule( - tokenizer_name_or_path="bert-base-uncased", - span_annotation="entities", - labels=["LOC", "PER"], - ) - taskmodule.post_prepare() - - metric = taskmodule.configure_model_metric(stage="test") - values = metric.compute() - assert values == { - "token/macro/f1": tensor(0.0), - "token/micro/f1": tensor(0.0), - "token/macro/precision": tensor(0.0), - "token/macro/recall": tensor(0.0), - "token/micro/precision": tensor(0.0), - "token/micro/recall": tensor(0.0), - } - - batch = taskmodule.collate(taskmodule.encode(documents, encode_target=True)) - targets = batch[1] - metric.update(targets, targets) - values = metric.compute() - assert values == { - "span/LOC/f1": tensor(1.0), - "span/LOC/precision": tensor(1.0), - "span/LOC/recall": tensor(1.0), - "span/PER/f1": tensor(1.0), - "span/PER/precision": tensor(1.0), - "span/PER/recall": tensor(1.0), - "span/macro/f1": tensor(1.0), - "span/macro/precision": tensor(1.0), - "span/macro/recall": tensor(1.0), - "span/micro/f1": tensor(1.0), - "span/micro/precision": tensor(1.0), - "span/micro/recall": tensor(1.0), - "token/macro/f1": tensor(1.0), - "token/micro/f1": tensor(1.0), - "token/macro/precision": tensor(1.0), - "token/macro/recall": tensor(1.0), - "token/micro/precision": tensor(1.0), - "token/micro/recall": tensor(1.0), - } - - target_labels = targets["labels"] - predicted_labels = torch.ones_like(target_labels) - # we need to set the same padding as in the targets - predicted_labels[target_labels == taskmodule.label_pad_id] = taskmodule.label_pad_id - prediction = {"labels": predicted_labels} - metric.update(prediction, targets) - values = metric.compute() - values_converted = {k: v.item() for k, v in values.items()} - assert values_converted == { - "token/macro/f1": 0.5434783101081848, - "token/micro/f1": 0.5249999761581421, - "token/macro/precision": 0.773809552192688, - "token/macro/recall": 0.625, - "token/micro/precision": 0.5249999761581421, - "token/micro/recall": 0.5249999761581421, - "span/LOC/recall": 0.0476190485060215, - "span/LOC/precision": 0.5, - "span/LOC/f1": 0.08695652335882187, - "span/macro/f1": 0.37681159377098083, - "span/macro/precision": 0.5, - "span/macro/recall": 0.523809552192688, - "span/micro/recall": 0.1304347813129425, - "span/micro/precision": 0.5, - "span/micro/f1": 0.2068965584039688, - "span/PER/recall": 1.0, - "span/PER/precision": 0.5, - "span/PER/f1": 0.6666666865348816, - } - - # ensure that the metric can be pickled - pickle.dumps(metric) diff --git a/tests/taskmodules/test_pointer_network_for_end2end_re.py b/tests/taskmodules/test_pointer_network_for_end2end_re.py deleted file mode 100644 index 5251b8ba8..000000000 --- a/tests/taskmodules/test_pointer_network_for_end2end_re.py +++ /dev/null @@ -1,1313 +0,0 @@ -import logging -import pickle -from dataclasses import asdict, dataclass -from typing import Dict, List, Set - -import pytest -import torch -from pie_core import AnnotationLayer, Document, annotation_field -from transformers import LogitsProcessorList - -from pie_modules.annotations import BinaryRelation, LabeledSpan -from pie_modules.documents import TextBasedDocument -from pie_modules.taskmodules import PointerNetworkTaskModuleForEnd2EndRE -from pie_modules.taskmodules.pointer_network.logits_processor import ( - FinitizeLogitsProcessor, - PrefixConstrainedLogitsProcessorWithMaximum, -) -from pie_modules.taskmodules.pointer_network_for_end2end_re import ( - LabelsAndOptionalConstraints, -) - -logger = logging.getLogger(__name__) - -DUMP_FIXTURE_DATA = False - - -def _config_to_str(cfg: Dict[str, str]) -> str: - result = "-".join([f"{k}={cfg[k]}" for k in sorted(cfg)]) - return result - - -CONFIGS = [{}, {"partition_layer_name": "sentences"}] -CONFIG_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIG_DICT.keys()) -def config_str(request): - return request.param - - -@pytest.fixture(scope="module") -def config(config_str): - return CONFIG_DICT[config_str] - - -@dataclass -class ExampleDocument(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="entities") - sentences: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - -@pytest.fixture(scope="module") -def document(): - doc = ExampleDocument(text="This is a dummy text about nothing. Trust me.") - span1 = LabeledSpan(start=10, end=20, label="content") - span2 = LabeledSpan(start=27, end=34, label="topic") - span3 = LabeledSpan(start=42, end=44, label="person") - doc.entities.extend([span1, span2, span3]) - assert str(span1) == "dummy text" - assert str(span2) == "nothing" - assert str(span3) == "me" - rel = BinaryRelation(head=span1, tail=span2, label="is_about") - doc.relations.append(rel) - assert str(rel.label) == "is_about" - assert str(rel.head) == "dummy text" - assert str(rel.tail) == "nothing" - - no_rel = BinaryRelation(head=span1, tail=span3, label="no_relation") - doc.relations.append(no_rel) - assert str(no_rel.label) == "no_relation" - assert str(no_rel.head) == "dummy text" - assert str(no_rel.tail) == "me" - - sent1 = LabeledSpan(start=0, end=35, label="1") - sent2 = LabeledSpan(start=36, end=45, label="2") - doc.sentences.extend([sent1, sent2]) - assert str(sent1) == "This is a dummy text about nothing." - assert str(sent2) == "Trust me." - return doc - - -def test_document(document): - spans = document.entities - assert len(spans) == 3 - assert (str(spans[0]), spans[0].label) == ("dummy text", "content") - assert (str(spans[1]), spans[1].label) == ("nothing", "topic") - assert (str(spans[2]), spans[2].label) == ("me", "person") - relations = document.relations - assert len(relations) == 2 - assert (str(relations[0].head), relations[0].label, str(relations[0].tail)) == ( - "dummy text", - "is_about", - "nothing", - ) - assert (str(relations[1].head), relations[1].label, str(relations[1].tail)) == ( - "dummy text", - "no_relation", - "me", - ) - sentences = document.sentences - assert len(sentences) == 2 - assert str(sentences[0]) == "This is a dummy text about nothing." - assert str(sentences[1]) == "Trust me." - - -@pytest.fixture(scope="module") -def taskmodule(document, config): - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - relation_layer_name="relations", - exclude_labels_per_layer={"relations": ["no_relation"]}, - annotation_field_mapping={ - "entities": "labeled_spans", - "relations": "binary_relations", - }, - create_constraints=True, - tokenizer_kwargs={"strict_span_conversion": False}, - **config, - ) - - taskmodule.prepare(documents=[document]) - return taskmodule - - -def test_taskmodule(taskmodule): - assert taskmodule.is_prepared - assert taskmodule.prepared_attributes == { - "labels_per_layer": { - "entities": ["content", "person", "topic"], - "relations": ["is_about"], - }, - } - assert taskmodule.layer_names == ["entities", "relations"] - assert taskmodule.special_targets == ["", ""] - assert taskmodule.labels == ["none", "content", "person", "topic", "is_about"] - assert taskmodule.targets == [ - "", - "", - "none", - "content", - "person", - "topic", - "is_about", - ] - assert taskmodule.bos_id == 0 - assert taskmodule.eos_id == 1 - assert taskmodule.none_id == 2 - assert taskmodule.span_ids == [3, 4, 5] - assert taskmodule.relation_ids == [6] - assert taskmodule.label2id == { - "content": 3, - "is_about": 6, - "none": 2, - "person": 4, - "topic": 5, - } - assert taskmodule.label_embedding_weight_mapping == { - 50265: [45260], - 50266: [39763], - 50267: [354, 1215, 9006], - 50268: [5970], - 50269: [10166], - } - assert taskmodule.target_tokens == [ - "", - "", - "<>", - "<>", - "<>", - "<>", - "<>", - ] - assert taskmodule.target_token_ids == [0, 2, 50266, 50269, 50268, 50265, 50267] - - -def test_taskmodule_with_wrong_annotation_field_mapping(): - with pytest.raises(ValueError) as exc_info: - PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - relation_layer_name="relations", - annotation_field_mapping={ - "entities": "labeled_spans", - "sentences": "labeled_spans", - }, - ) - assert str(exc_info.value) == ( - "inverted annotation_field_mapping is not unique. annotation_field_mapping: " - "{'entities': 'labeled_spans', 'sentences': 'labeled_spans'}" - ) - - -def test_prepared_config(taskmodule, config): - if config == {}: - assert taskmodule._config() == { - "taskmodule_type": "PointerNetworkTaskModuleForEnd2EndRE", - "relation_layer_name": "relations", - "symmetric_relations": None, - "none_label": "none", - "loop_dummy_relation_name": "loop", - "labels_per_layer": { - "entities": ["content", "person", "topic"], - "relations": ["is_about"], - }, - "exclude_labels_per_layer": {"relations": ["no_relation"]}, - "create_constraints": True, - "document_type": "pytorch_ie.documents.TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions", - "tokenized_document_type": "pie_modules.documents.TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions", - "tokenizer_name_or_path": "facebook/bart-base", - "tokenizer_init_kwargs": None, - "tokenizer_kwargs": {"strict_span_conversion": False}, - "partition_layer_name": None, - "add_reversed_relations": False, - "annotation_field_mapping": { - "entities": "labeled_spans", - "relations": "binary_relations", - }, - "constrained_generation": False, - "label_tokens": None, - "label_representations": None, - "log_first_n_examples": None, - } - elif config == {"partition_layer_name": "sentences"}: - assert taskmodule._config() == { - "taskmodule_type": "PointerNetworkTaskModuleForEnd2EndRE", - "relation_layer_name": "relations", - "symmetric_relations": None, - "none_label": "none", - "loop_dummy_relation_name": "loop", - "labels_per_layer": { - "entities": ["content", "person", "topic"], - "relations": ["is_about"], - }, - "exclude_labels_per_layer": {"relations": ["no_relation"]}, - "create_constraints": True, - "document_type": "pytorch_ie.documents.TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions", - "tokenized_document_type": "pie_modules.documents.TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions", - "tokenizer_name_or_path": "facebook/bart-base", - "tokenizer_init_kwargs": None, - "tokenizer_kwargs": {"strict_span_conversion": False}, - "partition_layer_name": "sentences", - "add_reversed_relations": False, - "annotation_field_mapping": { - "entities": "labeled_spans", - "relations": "binary_relations", - }, - "constrained_generation": False, - "label_tokens": None, - "label_representations": None, - "log_first_n_examples": None, - } - else: - raise Exception(f"unknown config: {config}") - - -@pytest.fixture() -def task_encoding_without_target(taskmodule, document): - return taskmodule.encode_input(document)[0] - - -def test_add_reversed_relation_labels(): - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - symmetric_relations=["symmetric_relation"], - ) - - labels = ["is_about", "symmetric_relation"] - labels_with_reversed = taskmodule.add_reversed_relation_labels(labels) - assert labels_with_reversed == {"is_about", "is_about_reversed", "symmetric_relation"} - - -def test_reverse_relation(): - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - symmetric_relations=["symmetric_relation"], - ) - - rel = BinaryRelation( - head=LabeledSpan(start=10, end=20, label="content"), - tail=LabeledSpan(start=27, end=34, label="topic"), - label="is_about", - ) - reversed_relation = taskmodule.reverse_relation(relation=rel) - assert reversed_relation == BinaryRelation( - head=LabeledSpan(start=27, end=34, label="topic", score=1.0), - tail=LabeledSpan(start=10, end=20, label="content", score=1.0), - label="is_about_reversed", - score=1.0, - ) - - sym_rel = BinaryRelation( - head=LabeledSpan(start=10, end=20, label="content"), - tail=LabeledSpan(start=27, end=34, label="topic"), - label="symmetric_relation", - ) - reversed_sym_rel = taskmodule.reverse_relation(relation=sym_rel) - assert reversed_sym_rel == BinaryRelation( - head=LabeledSpan(start=27, end=34, label="topic", score=1.0), - tail=LabeledSpan(start=10, end=20, label="content", score=1.0), - label="symmetric_relation", - score=1.0, - ) - - -def test_unreverse_relation(): - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - symmetric_relations=["symmetric_relation"], - ) - - # nothing should change because the relation is not reversed - rel = BinaryRelation( - head=LabeledSpan(start=10, end=20, label="content"), - tail=LabeledSpan(start=27, end=34, label="topic"), - label="is_about", - ) - same_rel = taskmodule.unreverse_relation(relation=rel) - assert same_rel == rel - - # the relation is reversed, so it should be un-reversed - reversed_rel = BinaryRelation( - head=LabeledSpan(start=10, end=20, label="content"), - tail=LabeledSpan(start=27, end=34, label="topic"), - label="is_about_reversed", - ) - unreversed_relation = taskmodule.unreverse_relation(relation=reversed_rel) - assert unreversed_relation == BinaryRelation( - head=LabeledSpan(start=27, end=34, label="topic", score=1.0), - tail=LabeledSpan(start=10, end=20, label="content", score=1.0), - label="is_about", - score=1.0, - ) - - # nothing should change because the relation is symmetric and already ordered (head < tail) - ordered_sym_rel = BinaryRelation( - head=LabeledSpan(start=10, end=20, label="content"), - tail=LabeledSpan(start=27, end=34, label="topic"), - label="symmetric_relation", - ) - unreversed_ordered_sym_rel = taskmodule.unreverse_relation(relation=ordered_sym_rel) - assert ordered_sym_rel == unreversed_ordered_sym_rel - - # the relation is symmetric and unordered (head > tail), so it should be un-reversed - unordered_sym_rel = BinaryRelation( - head=LabeledSpan(start=27, end=34, label="topic"), - tail=LabeledSpan(start=10, end=20, label="content"), - label="symmetric_relation", - ) - unreversed_unordered_sym_rel = taskmodule.unreverse_relation(relation=unordered_sym_rel) - assert unreversed_unordered_sym_rel == BinaryRelation( - head=LabeledSpan(start=10, end=20, label="content", score=1.0), - tail=LabeledSpan(start=27, end=34, label="topic", score=1.0), - label="symmetric_relation", - score=1.0, - ) - - -@pytest.fixture(params=[False, True]) -def taskmodule_with_reversed_relations(document, request) -> PointerNetworkTaskModuleForEnd2EndRE: - is_about_is_symmetric = request.param - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - relation_layer_name="relations", - exclude_labels_per_layer={"relations": ["no_relation"]}, - annotation_field_mapping={ - "entities": "labeled_spans", - "relations": "binary_relations", - }, - create_constraints=True, - tokenizer_kwargs={"strict_span_conversion": False}, - add_reversed_relations=True, - symmetric_relations=["is_about"] if is_about_is_symmetric else None, - ) - - taskmodule.prepare(documents=[document]) - assert taskmodule.is_prepared - if is_about_is_symmetric: - assert taskmodule.prepared_attributes == { - "labels_per_layer": { - "entities": ["content", "person", "topic"], - "relations": ["is_about"], - } - } - else: - assert taskmodule.prepared_attributes == { - "labels_per_layer": { - "entities": ["content", "person", "topic"], - "relations": ["is_about", "is_about_reversed"], - } - } - - return taskmodule - - -def test_encode_with_add_reversed_relations(taskmodule_with_reversed_relations, document): - task_encodings = taskmodule_with_reversed_relations.encode(document, encode_target=True) - assert len(task_encodings) == 1 - task_encoding = task_encodings[0] - assert task_encoding is not None - assert asdict(task_encoding.inputs) == { - "input_ids": [0, 713, 16, 10, 34759, 2788, 59, 1085, 4, 3101, 162, 4, 2], - "attention_mask": [1] * 13, - } - tokens = taskmodule_with_reversed_relations.tokenizer.convert_ids_to_tokens( - task_encoding.inputs.input_ids - ) - assert tokens == [ - "", - "This", - "Ä is", - "Ä a", - "Ä dummy", - "Ä text", - "Ä about", - "Ä nothing", - ".", - "Ä Trust", - "Ä me", - ".", - "", - ] - if "is_about" in taskmodule_with_reversed_relations.symmetric_relations: - decoded_annotations, statistics = taskmodule_with_reversed_relations.decode_annotations( - task_encoding.targets - ) - assert decoded_annotations == { - "entities": [ - LabeledSpan(start=4, end=6, label="content", score=1.0), - LabeledSpan(start=7, end=8, label="topic", score=1.0), - LabeledSpan(start=10, end=11, label="person", score=1.0), - ], - "relations": [ - BinaryRelation( - head=LabeledSpan(start=4, end=6, label="content", score=1.0), - tail=LabeledSpan(start=7, end=8, label="topic", score=1.0), - label="is_about", - score=1.0, - ), - BinaryRelation( - head=LabeledSpan(start=7, end=8, label="topic", score=1.0), - tail=LabeledSpan(start=4, end=6, label="content", score=1.0), - label="is_about", - score=1.0, - ), - ], - } - else: - decoded_annotations, statistics = taskmodule_with_reversed_relations.decode_annotations( - task_encoding.targets - ) - assert decoded_annotations == { - "entities": [ - LabeledSpan(start=4, end=6, label="content", score=1.0), - LabeledSpan(start=7, end=8, label="topic", score=1.0), - LabeledSpan(start=10, end=11, label="person", score=1.0), - ], - "relations": [ - BinaryRelation( - head=LabeledSpan(start=4, end=6, label="content", score=1.0), - tail=LabeledSpan(start=7, end=8, label="topic", score=1.0), - label="is_about", - score=1.0, - ), - BinaryRelation( - head=LabeledSpan(start=7, end=8, label="topic", score=1.0), - tail=LabeledSpan(start=4, end=6, label="content", score=1.0), - label="is_about_reversed", - score=1.0, - ), - ], - } - - -def test_encode_with_add_reversed_relations_already_exists(caplog): - doc = ExampleDocument(text="This is a dummy text about nothing. Trust me.") - doc.entities.append(LabeledSpan(start=10, end=20, label="content")) - doc.entities.append(LabeledSpan(start=27, end=34, label="topic")) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="is_about") - ) - doc.relations.append( - BinaryRelation(head=doc.entities[1], tail=doc.entities[0], label="is_about") - ) - - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - relation_layer_name="relations", - annotation_field_mapping={ - "entities": "labeled_spans", - "relations": "binary_relations", - }, - add_reversed_relations=True, - symmetric_relations=["is_about"], - ) - taskmodule.prepare(documents=[doc]) - - with caplog.at_level(logging.WARNING): - task_encodings = taskmodule.encode(doc, encode_target=True) - assert len(caplog.messages) == 0 - assert len(task_encodings) == 1 - task_encoding = task_encodings[0] - - decoded_annotations, statistics = taskmodule.decode_annotations(task_encoding.targets) - assert decoded_annotations == { - "entities": [ - LabeledSpan(start=4, end=6, label="content", score=1.0), - LabeledSpan(start=7, end=8, label="topic", score=1.0), - ], - "relations": [ - BinaryRelation( - head=LabeledSpan(start=4, end=6, label="content", score=1.0), - tail=LabeledSpan(start=7, end=8, label="topic", score=1.0), - label="is_about", - score=1.0, - ), - BinaryRelation( - head=LabeledSpan(start=7, end=8, label="topic", score=1.0), - tail=LabeledSpan(start=4, end=6, label="content", score=1.0), - label="is_about", - score=1.0, - ), - ], - } - - -def test_decode_with_add_reversed_relations(): - doc = ExampleDocument(text="This is a dummy text about nothing. Trust me.") - doc.entities.append(LabeledSpan(start=10, end=20, label="content")) - doc.entities.append(LabeledSpan(start=27, end=34, label="topic")) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="is_about") - ) - - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - relation_layer_name="relations", - annotation_field_mapping={ - "entities": "labeled_spans", - "relations": "binary_relations", - }, - add_reversed_relations=True, - ) - taskmodule.prepare(documents=[doc]) - - task_encodings = taskmodule.encode(doc, encode_target=True) - assert len(task_encodings) == 1 - decoded_annotations, statistics = taskmodule.decode_annotations(task_encodings[0].targets) - assert decoded_annotations == { - "entities": [ - LabeledSpan(start=4, end=6, label="content", score=1.0), - LabeledSpan(start=7, end=8, label="topic", score=1.0), - ], - "relations": [ - BinaryRelation( - head=LabeledSpan(start=4, end=6, label="content", score=1.0), - tail=LabeledSpan(start=7, end=8, label="topic", score=1.0), - label="is_about", - score=1.0, - ), - BinaryRelation( - head=LabeledSpan(start=7, end=8, label="topic", score=1.0), - tail=LabeledSpan(start=4, end=6, label="content", score=1.0), - label="is_about_reversed", - score=1.0, - ), - ], - } - - task_outputs = [task_encoding.targets for task_encoding in task_encodings] - docs_with_predictions = taskmodule.decode(task_encodings, task_outputs) - assert len(docs_with_predictions) == 1 - doc_with_predictions: ExampleDocument = docs_with_predictions[0] - assert set(doc_with_predictions.entities.predictions) == set(doc_with_predictions.entities) - assert set(doc_with_predictions.relations.predictions) == set(doc_with_predictions.relations) - - -@pytest.fixture() -def target_encoding(taskmodule, task_encoding_without_target): - return taskmodule.encode_target(task_encoding_without_target) - - -def test_target_encoding(target_encoding, taskmodule): - assert target_encoding is not None - if taskmodule.partition_layer_name is None: - assert asdict(target_encoding) == { - "labels": [14, 14, 5, 11, 12, 3, 6, 17, 17, 4, 2, 2, 2, 2, 1], - "constraints": [ - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ], - } - elif taskmodule.partition_layer_name == "sentences": - assert asdict(target_encoding) == { - "labels": [14, 14, 5, 11, 12, 3, 6, 1], - "constraints": [ - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ], - } - else: - raise Exception(f"unknown partition_layer_name: {taskmodule.partition_layer_name}") - - -def test_task_encoding_with_deduplicated_relations(caplog): - doc = ExampleDocument(text="This is a dummy text about nothing. Trust me.") - doc.entities.append(LabeledSpan(start=10, end=20, label="content")) - doc.entities.append(LabeledSpan(start=27, end=34, label="topic")) - doc.entities.append(LabeledSpan(start=42, end=44, label="person")) - assert doc.entities.resolve() == [ - ("content", "dummy text"), - ("topic", "nothing"), - ("person", "me"), - ] - # add the same relation twice (just use a different score, but that should not matter) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="is_about") - ) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="is_about", score=0.9) - ) - assert doc.relations.resolve() == [ - ("is_about", (("content", "dummy text"), ("topic", "nothing"))), - ("is_about", (("content", "dummy text"), ("topic", "nothing"))), - ] - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - relation_layer_name="relations", - annotation_field_mapping={ - "entities": "labeled_spans", - "relations": "binary_relations", - }, - ) - taskmodule.prepare(documents=[doc]) - caplog.clear() - with caplog.at_level(logging.WARNING): - task_encodings = taskmodule.encode(doc, encode_target=True) - messages = list(caplog.messages) - - assert len(task_encodings) == 1 - decoded_annotations, statistics = taskmodule.decode_annotations(task_encodings[0].targets) - assert decoded_annotations == { - "entities": [ - LabeledSpan(start=4, end=6, label="content", score=1.0), - LabeledSpan(start=7, end=8, label="topic", score=1.0), - LabeledSpan(start=10, end=11, label="person", score=1.0), - ], - "relations": [ - BinaryRelation( - head=LabeledSpan(start=4, end=6, label="content", score=1.0), - tail=LabeledSpan(start=7, end=8, label="topic", score=1.0), - label="is_about", - score=1.0, - ) - ], - } - - assert messages == [ - ( - "encoding errors: {'correct': 2}, skipped annotations:\n" - "{\n" - ' "relations": [\n' - ' "BinaryRelation(' - "head=LabeledSpan(start=4, end=6, label='content', score=1.0), " - "tail=LabeledSpan(start=7, end=8, label='topic', score=1.0), " - "label='is_about', score=0.9" - ')"\n' - " ]\n" - "}" - ) - ] - - -def test_task_encoding_with_conflicting_relations(caplog): - doc = ExampleDocument(text="This is a dummy text about nothing. Trust me.") - doc.entities.append(LabeledSpan(start=10, end=20, label="content")) - doc.entities.append(LabeledSpan(start=27, end=34, label="topic")) - doc.entities.append(LabeledSpan(start=42, end=44, label="person")) - assert doc.entities.resolve() == [ - ("content", "dummy text"), - ("topic", "nothing"), - ("person", "me"), - ] - # add two relations with the same head and tail, but different labels - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="is_about") - ) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="wrong_relation") - ) - assert doc.relations.resolve() == [ - ("is_about", (("content", "dummy text"), ("topic", "nothing"))), - ("wrong_relation", (("content", "dummy text"), ("topic", "nothing"))), - ] - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - relation_layer_name="relations", - annotation_field_mapping={ - "entities": "labeled_spans", - "relations": "binary_relations", - }, - ) - taskmodule.prepare(documents=[doc]) - caplog.clear() - with caplog.at_level(logging.ERROR): - task_encodings = taskmodule.encode(doc, encode_target=True) - messages = list(caplog.messages) - - assert len(task_encodings) == 0 - - assert messages == [ - "failed to encode target, it will be skipped: " - "relation ('Ä dummy', 'Ä text') -> ('Ä nothing',) already exists, but has " - "another label: is_about (current label: wrong_relation)." - ] - - -@pytest.fixture() -def task_encoding(task_encoding_without_target, target_encoding): - task_encoding_without_target.targets = target_encoding - return task_encoding_without_target - - -def _separate_constraint(constraint, taskmodule): - special_ids = sorted(taskmodule.special_target2id.values()) - none_ids = [taskmodule.none_id] - span_ids = taskmodule.span_ids - rel_ids = taskmodule.relation_ids - result = [[constraint[id] for id in ids] for ids in [special_ids, none_ids, span_ids, rel_ids]] - result += [constraint[taskmodule.pointer_offset :]] - assert sum(len(con_part) for con_part in result) == len(constraint) - return result - - -def test_build_constraint(taskmodule): - target_ids = [14, 14, 5, 11, 12, 3, 6, 17, 17, 4, 2, 2, 2, 2, 1] - input_len = 13 - - # empty previous_ids - constraint = taskmodule._build_constraint(previous_ids=[], input_len=input_len) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow eos and all offsets - assert constraint_formatted == [ - [0, 1], - [0], - [0, 0, 0], - [0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - - # just first span start - constraint = taskmodule._build_constraint(previous_ids=[14], input_len=input_len) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow all offsets after first span start - assert constraint_formatted == [ - [0, 0], - [0], - [0, 0, 0], - [0], - [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - ] - - # first span start and end - constraint = taskmodule._build_constraint(previous_ids=[14, 14], input_len=input_len) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow all span ids - assert constraint_formatted == [ - [0, 0], - [0], - [1, 1, 1], - [0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ] - - # first span start, end, and label - constraint = taskmodule._build_constraint(previous_ids=[14, 14, 5], input_len=input_len) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow none and all offsets except offsets covered by first span - assert constraint_formatted == [ - [0, 0], - [1], - [0, 0, 0], - [0], - [1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], - ] - - # first span, and second span start - constraint = taskmodule._build_constraint(previous_ids=[14, 14, 5, 11], input_len=input_len) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow all offsets after second span start, but not after first span start - assert constraint_formatted == [ - [0, 0], - [0], - [0, 0, 0], - [0], - [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], - ] - - # first span, and second span start and end - constraint = taskmodule._build_constraint( - previous_ids=[14, 14, 5, 11, 12], input_len=input_len - ) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow all span ids - assert constraint_formatted == [ - [0, 0], - [0], - [1, 1, 1], - [0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ] - - # first span, and second span - constraint = taskmodule._build_constraint( - previous_ids=[14, 14, 5, 11, 12, 3], input_len=input_len - ) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow all relation ids - assert constraint_formatted == [ - [0, 0], - [0], - [0, 0, 0], - [1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ] - - # fist span, and (1 to 3)-times none - for i in range(1, 3): - none_ids = [2] * i - constraint = taskmodule._build_constraint( - previous_ids=[14, 14, 5] + none_ids, input_len=input_len - ) - # [bos, eos], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow only none - assert constraint_formatted == [ - [0, 0], - [1], - [0, 0, 0], - [0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ] - - # contains eos - constraint = taskmodule._build_constraint( - previous_ids=[14, 14, 5, 11, 12, 3, 6, 1], input_len=input_len - ) - # [bos, eos/pad], [none], [content, person, topic], [is_about] [13 offsets (all remaining)] - constraint_formatted = _separate_constraint(constraint.tolist(), taskmodule) - # allow only pad (same as eos) - assert constraint_formatted == [ - [0, 1], - [0], - [0, 0, 0], - [0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ] - - -def test_maybe_log_example(taskmodule, task_encoding, caplog, config): - original_log_first_n_examples = taskmodule.log_first_n_examples - taskmodule.log_first_n_examples = 1 - caplog.clear() - with caplog.at_level(logging.INFO): - taskmodule.maybe_log_example(task_encoding) - if config == {}: - assert caplog.messages == [ - "*** Example ***", - "doc.id: None-tokenized-1-of-1", - "input_ids: 0 713 16 10 34759 2788 59 1085 4 3101 162 4 2", - "input_tokens: This Ä is Ä a Ä dummy Ä text Ä about Ä nothing . Ä Trust Ä me . " "", - "label_ids: 14 14 5 11 12 3 6 17 17 4 2 2 2 2 1", - "label_tokens: 14 {Ä nothing} 14 {Ä nothing} topic 11 {Ä dummy} 12 {Ä text} content is_about 17 {Ä me} 17 " - "{Ä me} person none none none none ", - "constraints: torch.Size([15, 20]) (content is omitted)", - ] - elif config == {"partition_layer_name": "sentences"}: - assert caplog.messages == [ - "*** Example ***", - "doc.id: None-tokenized-1-of-2", - "input_ids: 0 713 16 10 34759 2788 59 1085 4 2", - "input_tokens: This Ä is Ä a Ä dummy Ä text Ä about Ä nothing . ", - "label_ids: 14 14 5 11 12 3 6 1", - "label_tokens: 14 {Ä nothing} 14 {Ä nothing} topic 11 {Ä dummy} 12 {Ä text} content is_about ", - "constraints: torch.Size([8, 17]) (content is omitted)", - ] - else: - raise Exception(f"unknown config: {config}") - - # restore original value - taskmodule.log_first_n_examples = original_log_first_n_examples - - -def test_maybe_log_example_disabled(taskmodule, task_encoding, caplog): - original_log_first_n_examples = taskmodule.log_first_n_examples - taskmodule.log_first_n_examples = None - caplog.clear() - with caplog.at_level(logging.INFO): - taskmodule.maybe_log_example(task_encoding) - assert caplog.record_tuples == [] - - # restore original value - taskmodule.log_first_n_examples = original_log_first_n_examples - - -@pytest.fixture() -def task_encodings(taskmodule, document): - return taskmodule.encode(documents=[document], encode_target=True) - - -@pytest.fixture() -def batch(taskmodule, task_encodings): - return taskmodule.collate(task_encodings) - - -def test_collate(batch, taskmodule): - inputs, targets = batch - for tensor in inputs.values(): - assert isinstance(tensor, torch.Tensor) - assert tensor.dtype == torch.int64 - for tensor in targets.values(): - assert isinstance(tensor, torch.Tensor) - assert tensor.dtype == torch.int64 - inputs_lists = {k: inputs[k].tolist() for k in sorted(inputs)} - targets_lists = {k: targets[k].tolist() for k in sorted(targets)} - if taskmodule.partition_layer_name is None: - assert inputs_lists == { - "input_ids": [[0, 713, 16, 10, 34759, 2788, 59, 1085, 4, 3101, 162, 4, 2]], - "attention_mask": [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], - } - assert targets_lists == { - "constraints": [ - [ - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ] - ], - "labels": [[14, 14, 5, 11, 12, 3, 6, 17, 17, 4, 2, 2, 2, 2, 1]], - "decoder_attention_mask": [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], - } - elif taskmodule.partition_layer_name == "sentences": - assert inputs_lists == { - "input_ids": [ - [0, 713, 16, 10, 34759, 2788, 59, 1085, 4, 2], - [0, 18823, 162, 4, 2, 1, 1, 1, 1, 1], - ], - "attention_mask": [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], - } - assert targets_lists == { - "constraints": [ - [ - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ], - [ - [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, -1, -1, -1, -1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1], - ], - ], - "labels": [[14, 14, 5, 11, 12, 3, 6, 1], [9, 9, 4, 2, 2, 2, 2, 1]], - "decoder_attention_mask": [ - [1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1], - ], - } - else: - raise Exception(f"unknown partition_layer_name: {taskmodule.partition_layer_name}") - - -@pytest.fixture() -def unbatched_output(taskmodule, batch): - inputs, targets = batch - # because the model is trained to reproduce the target tokens, we can just use them as model prediction - return taskmodule.unbatch_output(targets) - - -@pytest.fixture() -def task_outputs(unbatched_output): - return unbatched_output - - -@pytest.fixture() -def task_output(task_outputs) -> LabelsAndOptionalConstraints: - return task_outputs[0] - - -def test_task_output(task_output, taskmodule): - output_list = task_output.labels - if taskmodule.partition_layer_name is None: - assert output_list == [14, 14, 5, 11, 12, 3, 6, 17, 17, 4, 2, 2, 2, 2, 1] - elif taskmodule.partition_layer_name == "sentences": - assert output_list == [14, 14, 5, 11, 12, 3, 6, 1] - else: - raise Exception(f"unknown partition_layer_name: {taskmodule.partition_layer_name}") - - -def _test_annotations_from_output(task_encodings, task_outputs, taskmodule, layer_names_expected): - assert len(task_outputs) == len(task_encodings) - - # this needs to be outside the below loop because documents can contain duplicates - # which would break the comparison when clearing predictions that were already added - for task_encoding in task_encodings: - for layer_name in layer_names_expected: - task_encoding.document[layer_name].predictions.clear() - - layer_names: Set[str] = set() - # Note: this list may contain duplicates! - documents: List[Document] = [] - for i in range(len(task_outputs)): - task_encoding = task_encodings[i] - task_output = task_outputs[i] - documents.append(task_encoding.document) - - for layer_name, annotation in taskmodule.create_annotations_from_output( - task_encoding=task_encoding, task_output=task_output - ): - task_encoding.document[layer_name].predictions.append(annotation) - layer_names.add(layer_name) - - assert layer_names == layer_names_expected - - for document in documents: - for layer_name in layer_names: - layer = { - str(ann) - for ann in document[layer_name].predictions - if ann.label in taskmodule.labels_per_layer[layer_name] - } - layer_expected = { - str(ann) - for ann in document[layer_name] - if ann.label in taskmodule.labels_per_layer[layer_name] - } - assert layer == layer_expected - - # this needs to be outside the above loop because documents can contain duplicates - # which would break the comparison when clearing predictions too early - for document in documents: - for layer_name in layer_names: - document[layer_name].predictions.clear() - - -def test_annotations_from_output(task_encodings, task_outputs, taskmodule): - _test_annotations_from_output( - taskmodule=taskmodule, - task_encodings=task_encodings, - task_outputs=task_outputs, - layer_names_expected={"entities", "relations"}, - ) - - -def get_default_taskmodule(**kwargs): - taskmodule = PointerNetworkTaskModuleForEnd2EndRE( - tokenizer_name_or_path="facebook/bart-base", - labels_per_layer={ - "labeled_spans": ["content", "person", "topic"], - "binary_relations": ["is_about"], - }, - **kwargs, - ) - taskmodule.post_prepare() - return taskmodule - - -def test_configure_model_metric(): - taskmodule = get_default_taskmodule() - metric = taskmodule.configure_model_metric() - assert metric is not None - values = metric.compute() - assert values == { - "binary_relations": {}, - "decoding_errors": {"all": 0.0}, - "exact_encoding_matches": 0.0, - "labeled_spans": {}, - } - - model_output = {"labels": torch.tensor([[14, 14, 5, 11, 12, 3, 6, 17, 17, 4, 2, 2, 2, 2, 1]])} - # test with expected == prediction - metric.update(model_output, model_output) - values = metric.compute() - assert values == { - "exact_encoding_matches": 1.0, - "decoding_errors": {"correct": 1.0, "all": 0.0}, - "labeled_spans": { - "content": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "person": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "topic": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "macro": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "micro": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - }, - "binary_relations": { - "is_about": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "macro": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - "micro": {"recall": 1.0, "precision": 1.0, "f1": 1.0}, - }, - } - torch.random.manual_seed(42) - # random_labels = torch.randint(0, 20, (1, 30)) - # split into random_labels1 and random_labels2 just for better code formatting - random_labels1 = [0, 14, 4, 19, 2, 6, 18, 3, 0, 8, 8, 14, 2, 1] - random_labels2 = [14, 6, 7, 8, 4, 1, 17, 9, 14, 7, 13, 15, 5, 12, 18, 13] - labels_random = torch.tensor([random_labels1 + random_labels2]) - metric.reset() - # test the case where we have mixed results (correct and wrong) - metric.update(model_output, model_output) - metric.update(prediction={"labels": labels_random}, expected=model_output) - values = metric.compute() - assert values == { - "exact_encoding_matches": 0.5, - "decoding_errors": {"correct": 0.5, "len": 0.25, "order": 0.25, "all": 0.5}, - "labeled_spans": { - "person": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - "topic": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - "content": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - "macro": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - "micro": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - }, - "binary_relations": { - "is_about": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - "macro": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - "micro": {"recall": 0.5, "precision": 1.0, "f1": 0.6666666865348816}, - }, - } - - # ensure that the metric can be pickled - pickle.dumps(metric) - - -def test_configure_model_generation(): - taskmodule = get_default_taskmodule() - assert taskmodule.configure_model_generation() == { - "no_repeat_ngram_size": 7, - } - - -def test_configure_model_generation_with_constrained_generation(): - taskmodule = get_default_taskmodule(constrained_generation=True) - generation_config = taskmodule.configure_model_generation() - assert set(generation_config) == {"no_repeat_ngram_size", "logits_processor"} - assert generation_config["no_repeat_ngram_size"] == 7 - logits_processor = generation_config["logits_processor"] - assert isinstance(logits_processor, LogitsProcessorList) - assert len(logits_processor) == 2 - assert isinstance(logits_processor[0], FinitizeLogitsProcessor) - assert isinstance(logits_processor[1], PrefixConstrainedLogitsProcessorWithMaximum) - - -def test_prefix_allowed_tokens_fn_with_maximum(): - taskmodule = get_default_taskmodule() - # not that this includes the leading bos token - add_previous_input_ids = torch.tensor([0, 14, 14, 5, 11, 12, 3, 6, 17, 17, 4, 2, 2, 2, 2, 1]) - - # empty input (first entry) - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:1], maximum=20 - ) - # allow the eos id [1] and all offset ids [7..19] - assert allowed_ids == [1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - - # first span start - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:2], maximum=20 - ) - # allow all offset ids from first span start [14..19] - assert allowed_ids == [14, 15, 16, 17, 18, 19] - - # first span start and end - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:3], maximum=20 - ) - # allow all span ids - assert allowed_ids == [3, 4, 5] - - # first span start, end, and label - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:4], maximum=20 - ) - # allow none [2] and all offsets except offsets covered by first span [14] - assert allowed_ids == [2, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19] - - # first span, and second span start - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:5], maximum=20 - ) - # allow all offsets from second span start [11], but before first span start [14] because it would be an overlap - assert allowed_ids == [11, 12, 13] - - # first span, and second span start and end - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:6], maximum=20 - ) - # allow all span ids - assert allowed_ids == [3, 4, 5] - - # first span, and second span - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:7], maximum=20 - ) - # allow all relation ids - assert allowed_ids == [6] - - # entry begins (second entry) - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:8], maximum=20 - ) - # allow eos [1] and all offsets [7..19] - assert allowed_ids == [1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - - # first span start - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:9], maximum=20 - ) - # allow all offsets from first span start [17..19] - assert allowed_ids == [17, 18, 19] - - # first span start and end - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:10], maximum=20 - ) - # allow all span ids - assert allowed_ids == [3, 4, 5] - - # first span start, end, and span label - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:11], maximum=20 - ) - # allow none [2] and all offsets except offsets covered by first span [17] - assert allowed_ids == [2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19] - - # first span, and none - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:12], maximum=20 - ) - # allow only none [2] because when the entry contains already a none id, it cannot be followed by anything else - assert allowed_ids == [2] - - # first span, and none, and none - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:13], maximum=20 - ) - # allow only none [2] because when the entry contains already a none id, it cannot be followed by anything else - assert allowed_ids == [2] - - # first span, and none, and none, and none - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:14], maximum=20 - ) - # allow only none [2] because when the entry contains already a none id, it cannot be followed by anything else - assert allowed_ids == [2] - - # first span, and none, and none, and none, and none (second entry is complete) - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:15], maximum=20 - ) - # allow eos [1] and all offsets [7..19] - assert allowed_ids == [1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - - # got an eos, so the sequence is complete - allowed_ids = taskmodule._prefix_allowed_tokens_fn_with_maximum( - batch_id=0, input_ids=add_previous_input_ids[:16], maximum=20 - ) - # allow only pad [1] (same as eos) because the sequence is complete - assert allowed_ids == [1] diff --git a/tests/taskmodules/test_re_span_pair_classification.py b/tests/taskmodules/test_re_span_pair_classification.py deleted file mode 100644 index f82554c1e..000000000 --- a/tests/taskmodules/test_re_span_pair_classification.py +++ /dev/null @@ -1,614 +0,0 @@ -import dataclasses -import logging -from typing import Any, Dict, Union - -import pytest -import torch -from pie_core import AnnotationLayer, annotation_field -from pie_core.utils.dictionary import flatten_dict_s -from torch import tensor -from torchmetrics import Metric, MetricCollection - -from pie_modules.annotations import BinaryRelation, LabeledSpan -from pie_modules.documents import TextBasedDocument -from pie_modules.taskmodules import RESpanPairClassificationTaskModule -from pie_modules.utils.span import distance -from tests import _config_to_str - -TOKENIZER_NAME_OR_PATH = "bert-base-cased" - -CONFIGS = [{}, {"partition_annotation": "sentences"}] -CONFIGS_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIGS_DICT.keys()) -def cfg(request): - return CONFIGS_DICT[request.param] - - -@pytest.fixture(scope="module") -def unprepared_taskmodule(cfg): - taskmodule = RESpanPairClassificationTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=TOKENIZER_NAME_OR_PATH, - log_first_n_examples=10, - collect_statistics=True, - **cfg, - ) - assert not taskmodule.is_from_pretrained - - return taskmodule - - -@dataclasses.dataclass -class FixedTestDocument(TextBasedDocument): - sentences: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="entities") - - -@pytest.fixture(scope="module") -def fixed_documents(documents): - result = [] - for document in documents: - fixed_doc = document.copy(with_annotations=False).as_type(FixedTestDocument) - for sentence in document.sentences: - fixed_doc.sentences.append( - LabeledSpan(start=sentence.start, end=sentence.end, label="sentence") - ) - entity_mapping = {} - for entity in document.entities: - new_entity = entity.copy() - fixed_doc.entities.append(new_entity) - entity_mapping[entity] = new_entity - for relation in document.relations: - new_relation = relation.copy( - head=entity_mapping[relation.head], tail=entity_mapping[relation.tail] - ) - fixed_doc.relations.append(new_relation) - result.append(fixed_doc) - return result - - -@pytest.fixture(scope="module") -def taskmodule(unprepared_taskmodule, fixed_documents) -> RESpanPairClassificationTaskModule: - unprepared_taskmodule.prepare(fixed_documents) - return unprepared_taskmodule - - -def test_taskmodule(taskmodule: RESpanPairClassificationTaskModule): - assert taskmodule.is_prepared - - assert taskmodule.relation_annotation == "relations" - assert taskmodule.labels == ["org:founded_by", "per:employee_of", "per:founder"] - assert taskmodule.entity_labels == ["ORG", "PER"] - assert taskmodule.label_to_id == { - "org:founded_by": 1, - "per:employee_of": 2, - "per:founder": 3, - "no_relation": 0, - } - assert taskmodule.argument_markers == [ - "[/SPAN:ORG]", - "[/SPAN:PER]", - "[SPAN:ORG]", - "[SPAN:PER]", - ] - assert taskmodule.tokenizer.additional_special_tokens == [ - "[SPAN:PER]", - "[/SPAN:ORG]", - "[/SPAN:PER]", - "[SPAN:ORG]", - ] - assert taskmodule.tokenizer.additional_special_tokens_ids == [28996, 28997, 28998, 28999] - - # because this is not the standard value for relation_annotation, we can not determine the document type - assert taskmodule.document_type is None - - -@pytest.fixture(scope="module") -def document(fixed_documents): - result = fixed_documents[4] - assert ( - result.metadata["description"] - == "sentences with multiple relation annotations and cross-sentence relation" - ) - return result - - -def test_create_candidate_relations(taskmodule, document): - # _create_candidate_relations requires normalized documents - normalized_document = taskmodule.normalize_document(document) - candidate_relations = taskmodule._create_candidate_relations(normalized_document) - resolved_relations = [ann.resolve() for ann in candidate_relations] - assert resolved_relations == [ - ("no_relation", (("PER", "Entity G"), ("ORG", "H"))), - ("no_relation", (("PER", "Entity G"), ("ORG", "I"))), - ("no_relation", (("ORG", "H"), ("PER", "Entity G"))), - ("no_relation", (("ORG", "H"), ("ORG", "I"))), - ("no_relation", (("ORG", "I"), ("PER", "Entity G"))), - ("no_relation", (("ORG", "I"), ("ORG", "H"))), - ] - - -def test_create_candidate_relations_with_max_distance(taskmodule, document): - # _create_candidate_relations requires normalized documents - normalized_document = taskmodule.normalize_document(document) - candidate_relations = taskmodule._create_candidate_relations( - normalized_document, max_argument_distance=10 - ) - resolved_relations = [ann.resolve() for ann in candidate_relations] - assert resolved_relations == [ - ("no_relation", (("PER", "Entity G"), ("ORG", "H"))), - ("no_relation", (("ORG", "H"), ("PER", "Entity G"))), - ] - distances = [ - distance( - start_end=(rel.head.start, rel.head.end), - other_start_end=(rel.tail.start, rel.tail.end), - distance_type="inner", - ) - for rel in candidate_relations - ] - assert distances == [10.0, 10.0] - - -@pytest.fixture(scope="module") -def task_encodings(taskmodule, document): - result = taskmodule.encode(document, encode_target=True) - return result - - -def test_encode_input(task_encodings, document, taskmodule, cfg): - assert task_encodings is not None - if cfg == {}: - assert len(task_encodings) == 1 - inputs = task_encodings[0].inputs - assert set(inputs) == { - "input_ids", - "attention_mask", - "span_start_indices", - "span_end_indices", - "tuple_indices", - "tuple_indices_mask", - } - tokens = taskmodule.tokenizer.convert_ids_to_tokens(inputs["input_ids"]) - assert tokens == [ - "[CLS]", - "First", - "sentence", - ".", - "[SPAN:PER]", - "En", - "##ti", - "##ty", - "G", - "[/SPAN:PER]", - "works", - "at", - "[SPAN:ORG]", - "H", - "[/SPAN:ORG]", - ".", - "And", - "founded", - "[SPAN:ORG]", - "I", - "[/SPAN:ORG]", - ".", - "[SEP]", - ] - span_tokens = [ - tokens[start:end] - for start, end in zip(inputs["span_start_indices"], inputs["span_end_indices"]) - ] - assert span_tokens == [ - ["[SPAN:PER]", "En", "##ti", "##ty", "G", "[/SPAN:PER]"], - ["[SPAN:ORG]", "H", "[/SPAN:ORG]"], - ["[SPAN:ORG]", "I", "[/SPAN:ORG]"], - ] - tuple_spans = [ - [span_tokens[idx] for idx in indices] for indices in inputs["tuple_indices"] - ] - assert tuple_spans == [ - [ - ["[SPAN:PER]", "En", "##ti", "##ty", "G", "[/SPAN:PER]"], - ["[SPAN:ORG]", "H", "[/SPAN:ORG]"], - ], - [ - ["[SPAN:PER]", "En", "##ti", "##ty", "G", "[/SPAN:PER]"], - ["[SPAN:ORG]", "I", "[/SPAN:ORG]"], - ], - [["[SPAN:ORG]", "I", "[/SPAN:ORG]"], ["[SPAN:ORG]", "H", "[/SPAN:ORG]"]], - ] - assert inputs["tuple_indices_mask"].tolist() == [True, True, True] - elif cfg == {"partition_annotation": "sentences"}: - assert len(task_encodings) == 1 - for idx, encoding in enumerate(task_encodings): - inputs = encoding.inputs - assert set(inputs) == { - "input_ids", - "attention_mask", - "span_start_indices", - "span_end_indices", - "tuple_indices", - "tuple_indices_mask", - } - tokens = taskmodule.tokenizer.convert_ids_to_tokens(inputs["input_ids"]) - span_tokens = [ - tokens[start:end] - for start, end in zip(inputs["span_start_indices"], inputs["span_end_indices"]) - ] - tuple_spans = [ - [span_tokens[idx] for idx in indices] for indices in inputs["tuple_indices"] - ] - if idx == 0: - assert tokens == [ - "[CLS]", - "En", - "##ti", - "##ty", - "G", - "[/SPAN:PER]", - "works", - "at", - "[SPAN:ORG]", - "H", - "[/SPAN:ORG]", - ".", - "[SEP]", - ] - assert span_tokens == [ - ["[CLS]", "En", "##ti", "##ty", "G", "[/SPAN:PER]"], - ["[SPAN:ORG]", "H", "[/SPAN:ORG]"], - ] - assert tuple_spans == [ - [ - ["[CLS]", "En", "##ti", "##ty", "G", "[/SPAN:PER]"], - ["[SPAN:ORG]", "H", "[/SPAN:ORG]"], - ] - ] - assert inputs["tuple_indices_mask"].tolist() == [True] - else: - raise ValueError(f"unexpected idx: {idx}") - else: - raise ValueError(f"unexpected config: {cfg}") - - -def test_encode_target(taskmodule, task_encodings, cfg): - if cfg == {}: - assert len(task_encodings) == 1 - targets = task_encodings[0].targets - labels = [taskmodule.id_to_label[label] for label in targets["labels"].tolist()] - assert labels == ["per:employee_of", "per:founder", "org:founded_by"] - elif cfg == {"partition_annotation": "sentences"}: - assert len(task_encodings) == 1 - for idx, encoding in enumerate(task_encodings): - targets = encoding.targets - labels = [taskmodule.id_to_label[label] for label in targets["labels"].tolist()] - if idx == 0: - assert labels == ["per:employee_of"] - else: - raise ValueError(f"unexpected idx: {idx}") - else: - raise ValueError(f"unexpected config: {cfg}") - - -def test_encode_with_no_gold_relation(document): - # create a new taskmodule that does create candidate relations - taskmodule = RESpanPairClassificationTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=TOKENIZER_NAME_OR_PATH, - create_candidate_relations=True, - labels=["org:founded_by", "per:employee_of", "per:founder"], - entity_labels=["ORG", "PER"], - ) - taskmodule.post_prepare() - # create a new document that has no relations - document = document.copy() - document.relations.clear() - - encodings = taskmodule.encode(document, encode_target=True) - - assert len(encodings) == 1 - encoding = encodings[0] - # same number of candidate relations as there are labels - assert len(encoding.metadata["candidate_relations"]) == encoding.targets["labels"].numel() - assert all(rel.label == "no_relation" for rel in encoding.metadata["candidate_relations"]) - assert encoding.targets["labels"].tolist() == [0, 0, 0, 0, 0, 0] - - -def test_encode_with_multiple_gold_relations_with_same_arguments(document, caplog): - # create a new taskmodule that does create candidate relations - taskmodule = RESpanPairClassificationTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=TOKENIZER_NAME_OR_PATH, - labels=["org:founded_by", "per:employee_of", "per:founder"], - entity_labels=["ORG", "PER"], - ) - taskmodule.post_prepare() - # create a new document that has multiple relations with the same arguments - document = document.copy() - document.relations.clear() - head = document.entities[0] - tail = document.entities[1] - document.relations.extend( - [ - BinaryRelation(head=head, tail=tail, label="org:founded_by"), - BinaryRelation(head=head, tail=tail, label="per:employee_of"), - ] - ) - - caplog.clear() - with caplog.at_level(logging.WARNING): - encodings = taskmodule.encode(document, encode_target=True) - assert len(caplog.messages) == 2 - assert ( - caplog.messages[0] - == "skip the candidate relation because there are more than one gold relation for " - "its args and roles: [BinaryRelation(head=LabeledSpan(start=5, end=10, label='PER', score=1.0), " - "tail=LabeledSpan(start=13, end=15, label='ORG', score=1.0), label='org:founded_by', score=1.0), " - "BinaryRelation(head=LabeledSpan(start=5, end=10, label='PER', score=1.0), " - "tail=LabeledSpan(start=13, end=15, label='ORG', score=1.0), label='per:employee_of', score=1.0)]" - ) - assert ( - caplog.messages[1] - == "skip the candidate relation because there are more than one gold relation for " - "its args and roles: [BinaryRelation(head=LabeledSpan(start=5, end=10, label='PER', score=1.0), " - "tail=LabeledSpan(start=13, end=15, label='ORG', score=1.0), label='org:founded_by', score=1.0), " - "BinaryRelation(head=LabeledSpan(start=5, end=10, label='PER', score=1.0), " - "tail=LabeledSpan(start=13, end=15, label='ORG', score=1.0), label='per:employee_of', score=1.0)]" - ) - - assert len(encodings) == 1 - encoding = encodings[0] - candidate_relations = encoding.metadata["candidate_relations"] - # same number of candidate relations as there are labels - assert len(candidate_relations) == encoding.targets["labels"].numel() - assert candidate_relations[0].label == "org:founded_by" - assert candidate_relations[1].label == "per:employee_of" - assert encoding.targets["labels"].tolist() == [-100, -100] - - -def test_maybe_log_example(taskmodule, task_encodings, caplog, cfg): - caplog.clear() - if cfg == {}: - with caplog.at_level(logging.INFO): - taskmodule._maybe_log_example(task_encodings[0], target=task_encodings[0].targets) - assert caplog.messages == [ - "*** Example ***", - "doc id: train_doc5", - "tokens: [CLS] First sentence . [SPAN:PER] En ##ti ##ty G [/SPAN:PER] works at [SPAN:ORG] H [/SPAN:ORG] . And founded [SPAN:ORG] I [/SPAN:ORG] . [SEP]", - "input_ids: 101 1752 5650 119 28996 13832 3121 2340 144 28998 1759 1120 28999 145 28997 119 1262 1771 28999 146 28997 119 102", - "relation 0: per:employee_of", - "\targ 0: [SPAN:PER] En ##ti ##ty G [/SPAN:PER]", - "\targ 1: [SPAN:ORG] H [/SPAN:ORG]", - "relation 1: per:founder", - "\targ 0: [SPAN:PER] En ##ti ##ty G [/SPAN:PER]", - "\targ 1: [SPAN:ORG] I [/SPAN:ORG]", - "relation 2: org:founded_by", - "\targ 0: [SPAN:ORG] I [/SPAN:ORG]", - "\targ 1: [SPAN:ORG] H [/SPAN:ORG]", - ] - elif cfg == {"partition_annotation": "sentences"}: - with caplog.at_level(logging.INFO): - taskmodule._maybe_log_example(task_encodings[0], target=task_encodings[0].targets) - assert caplog.messages == [ - "*** Example ***", - "doc id: train_doc5", - "tokens: [CLS] En ##ti ##ty G [/SPAN:PER] works at [SPAN:ORG] H [/SPAN:ORG] . [SEP]", - "input_ids: 101 13832 3121 2340 144 28998 1759 1120 28999 145 28997 119 102", - "relation 0: per:employee_of", - "\targ 0: [CLS] En ##ti ##ty G [/SPAN:PER]", - "\targ 1: [SPAN:ORG] H [/SPAN:ORG]", - ] - else: - raise ValueError(f"unexpected config: {cfg}") - - -def test_encode_with_statistics(taskmodule, fixed_documents, cfg, caplog): - caplog.clear() - with caplog.at_level(logging.INFO): - taskmodule.encode(fixed_documents, encode_target=True) - assert len(caplog.messages) > 0 - statistics = caplog.messages[-1] - if cfg == {}: - assert ( - statistics - == """statistics: -| | org:founded_by | per:employee_of | per:founder | -|:--------------------|-----------------:|------------------:|--------------:| -| available | 2 | 3 | 2 | -| available_tokenized | 2 | 3 | 2 | -| used | 2 | 3 | 2 |""" - ) - elif cfg == {"partition_annotation": "sentences"}: - assert ( - statistics - == """statistics: -| | org:founded_by | per:employee_of | per:founder | -|:--------------------|-----------------:|------------------:|--------------:| -| available | 2 | 3 | 2 | -| available_tokenized | 1 | 3 | 1 | -| used | 1 | 3 | 1 |""" - ) - else: - raise ValueError(f"unexpected config: {cfg}") - - -def test_collate(taskmodule, task_encodings, cfg): - result = taskmodule.collate(task_encodings) - assert result is not None - inputs, targets = result - assert set(inputs) == { - "input_ids", - "attention_mask", - "span_start_indices", - "span_end_indices", - "tuple_indices", - "tuple_indices_mask", - } - if cfg == {}: - torch.testing.assert_close( - inputs["input_ids"], - tensor( - [ - [ - 101, - 1752, - 5650, - 119, - 28996, - 13832, - 3121, - 2340, - 144, - 28998, - 1759, - 1120, - 28999, - 145, - 28997, - 119, - 1262, - 1771, - 28999, - 146, - 28997, - 119, - 102, - ] - ] - ), - ) - torch.testing.assert_close(inputs["attention_mask"], torch.ones_like(inputs["input_ids"])) - torch.testing.assert_close(inputs["span_start_indices"], tensor([[4, 12, 18]])) - torch.testing.assert_close(inputs["span_end_indices"], tensor([[10, 15, 21]])) - torch.testing.assert_close(inputs["tuple_indices"], tensor([[[0, 1], [0, 2], [2, 1]]])) - torch.testing.assert_close(inputs["tuple_indices_mask"], tensor([[True, True, True]])) - assert set(targets) == {"labels"} - torch.testing.assert_close(targets["labels"], tensor([[2, 3, 1]])) - elif cfg == {"partition_annotation": "sentences"}: - torch.testing.assert_close( - inputs["input_ids"], - tensor( - [[101, 13832, 3121, 2340, 144, 28998, 1759, 1120, 28999, 145, 28997, 119, 102]] - ), - ) - torch.testing.assert_close( - inputs["attention_mask"], tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) - ) - torch.testing.assert_close(inputs["span_start_indices"], tensor([[0, 8]])) - torch.testing.assert_close(inputs["span_end_indices"], tensor([[6, 11]])) - torch.testing.assert_close(inputs["tuple_indices"], tensor([[[0, 1]]])) - torch.testing.assert_close(inputs["tuple_indices_mask"], tensor([[True]])) - assert set(targets) == {"labels"} - torch.testing.assert_close(targets["labels"], tensor([[2]])) - else: - raise ValueError(f"unexpected config: {cfg}") - - -@pytest.fixture -def model_output(): - return { - "labels": torch.tensor([[2, 3, 1]]), - "probabilities": torch.tensor( - [ - [ - # no_relation, org:founded_by, per:employee_of, per:founder - [0.1, 0.2, 0.6, 0.1], - [0.1, 0.2, 0.2, 0.5], - [0.2, 0.5, 0.2, 0.1], - ] - ] - ), - } - - -@pytest.fixture -def unbatched_model_outputs(taskmodule, model_output): - return taskmodule.unbatch_output(model_output) - - -def test_unbatch_outputs(taskmodule, unbatched_model_outputs): - assert len(unbatched_model_outputs) == 1 - result = unbatched_model_outputs[0] - assert set(result) == {"labels", "probabilities"} - assert result["labels"] == ["per:employee_of", "per:founder", "org:founded_by"] - assert result["probabilities"] == [0.6000000238418579, 0.5, 0.5] - - -def test_create_annotations_from_output( - taskmodule, unbatched_model_outputs, task_encodings, document -): - result = list( - taskmodule.create_annotations_from_output( - task_encoding=task_encodings[0], task_output=unbatched_model_outputs[0] - ) - ) - scores = [0.6000000238418579, 0.5, 0.5] - for i, ((layer_name, predicted_relation), original_relation) in enumerate( - zip(result, document.relations) - ): - assert layer_name == taskmodule.relation_annotation - assert predicted_relation == original_relation.copy() - assert predicted_relation.score == scores[i] - - -def get_metric_state(metric_or_collection: Union[Metric, MetricCollection]) -> Dict[str, Any]: - if isinstance(metric_or_collection, Metric): - return { - k: v.tolist() for k, v in flatten_dict_s(metric_or_collection.metric_state).items() - } - elif isinstance(metric_or_collection, MetricCollection): - return flatten_dict_s({k: get_metric_state(v) for k, v in metric_or_collection.items()}) - else: - raise ValueError(f"unsupported type: {type(metric_or_collection)}") - - -def test_configure_model_metrics(taskmodule, model_output): - metrics = taskmodule.configure_model_metric(stage="train") - assert metrics is not None - assert isinstance(metrics, (Metric, MetricCollection)) - state = get_metric_state(metrics) - assert state == { - "f1_per_label/tp": [0, 0, 0, 0], - "f1_per_label/fp": [0, 0, 0, 0], - "f1_per_label/tn": [0, 0, 0, 0], - "f1_per_label/fn": [0, 0, 0, 0], - "macro/f1/tp": [0, 0, 0, 0], - "macro/f1/fp": [0, 0, 0, 0], - "macro/f1/tn": [0, 0, 0, 0], - "macro/f1/fn": [0, 0, 0, 0], - "micro/f1/tp": [0], - "micro/f1/fp": [0], - "micro/f1/tn": [0], - "micro/f1/fn": [0], - } - - metric_values = metrics(model_output, model_output) - state = get_metric_state(metrics) - assert state == { - "f1_per_label/tp": [0, 1, 1, 1], - "f1_per_label/fp": [0, 0, 0, 0], - "f1_per_label/tn": [3, 2, 2, 2], - "f1_per_label/fn": [0, 0, 0, 0], - "macro/f1/tp": [0, 1, 1, 1], - "macro/f1/fp": [0, 0, 0, 0], - "macro/f1/tn": [3, 2, 2, 2], - "macro/f1/fn": [0, 0, 0, 0], - "micro/f1/tp": [3], - "micro/f1/fp": [0], - "micro/f1/tn": [9], - "micro/f1/fn": [0], - } - - metric_values_converted = {key: value.item() for key, value in metric_values.items()} - assert metric_values_converted == { - "macro/f1": 1.0, - "micro/f1": 1.0, - "no_relation/f1": 0.0, - "org:founded_by/f1": 1.0, - "per:employee_of/f1": 1.0, - "per:founder/f1": 1.0, - } diff --git a/tests/taskmodules/test_re_text_classification_with_indices.py b/tests/taskmodules/test_re_text_classification_with_indices.py deleted file mode 100644 index 0574e10e5..000000000 --- a/tests/taskmodules/test_re_text_classification_with_indices.py +++ /dev/null @@ -1,3159 +0,0 @@ -import dataclasses -import logging -import pickle -import re -from dataclasses import dataclass -from typing import Any, Dict, List, Union - -import pytest -import torch -from pie_core import ( - Annotation, - AnnotationLayer, - Document, - TaskEncoding, - annotation_field, -) -from pie_core.utils.dictionary import flatten_dict_s -from torch import tensor -from torchmetrics import Metric, MetricCollection - -from pie_modules.annotations import BinaryRelation, LabeledSpan, NaryRelation -from pie_modules.documents import ( - TextBasedDocument, - TextDocumentWithLabeledSpansAndBinaryRelations, -) -from pie_modules.taskmodules import RETextClassificationWithIndicesTaskModule -from pie_modules.taskmodules.re_text_classification_with_indices import ( - HEAD, - TAIL, - find_sublist, - get_relation_argument_spans_and_roles, - span_distance, -) -from pie_modules.utils.span import distance_inner -from tests import _config_to_str -from tests.conftest import _TABULATE_AVAILABLE, TestDocument - -CONFIGS = [ - {"add_type_to_marker": False, "append_markers": False}, - {"add_type_to_marker": True, "append_markers": False}, - {"add_type_to_marker": False, "append_markers": True}, - {"add_type_to_marker": True, "append_markers": True}, -] -CONFIGS_DICT = {_config_to_str(cfg): cfg for cfg in CONFIGS} - - -@pytest.fixture(scope="module", params=CONFIGS_DICT.keys()) -def cfg(request): - return CONFIGS_DICT[request.param] - - -def test_taskmodule_with_deprecated_parameters(caplog): - with caplog.at_level(logging.WARNING): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - tokenizer_name_or_path=tokenizer_name_or_path, label_to_id={"a": 0, "b": 1} - ) - assert taskmodule.labels == ["a", "b"] - # check the warning message - assert len(caplog.records) == 1 - assert ( - caplog.records[0].message - == "The parameter label_to_id is deprecated and will be removed in a future version. Please use labels instead." - ) - - -@pytest.fixture(scope="module") -def unprepared_taskmodule(cfg): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", tokenizer_name_or_path=tokenizer_name_or_path, **cfg - ) - assert not taskmodule.is_from_pretrained - - return taskmodule - - -@pytest.fixture(scope="module") -def taskmodule(unprepared_taskmodule, documents): - unprepared_taskmodule.prepare(documents) - return unprepared_taskmodule - - -@pytest.fixture -def model_output(): - return { - "labels": torch.tensor([1, 0, 2, 3, 1, 0, 0, 0]), - "probabilities": torch.tensor( - [ - # O, org:founded_by, per:employee_of, per:founder - [0.1, 0.6, 0.1, 0.2], - [0.5, 0.2, 0.2, 0.1], - [0.1, 0.2, 0.6, 0.1], - [0.1, 0.2, 0.2, 0.5], - [0.2, 0.4, 0.3, 0.1], - [0.5, 0.2, 0.2, 0.1], - [0.6, 0.1, 0.2, 0.1], - [0.5, 0.2, 0.2, 0.1], - ] - ), - } - - -def test_prepared_taskmodule(taskmodule, documents): - assert taskmodule.is_prepared - - assert taskmodule.entity_labels == ["ORG", "PER"] - - if taskmodule.append_markers: - if taskmodule.add_type_to_marker: - assert taskmodule.argument_markers == [ - "[/H:ORG]", - "[/H:PER]", - "[/H]", - "[/T:ORG]", - "[/T:PER]", - "[/T]", - "[H:ORG]", - "[H:PER]", - "[H=ORG]", - "[H=PER]", - "[H]", - "[T:ORG]", - "[T:PER]", - "[T=ORG]", - "[T=PER]", - "[T]", - ] - assert taskmodule.argument_markers_to_id == { - "[/H:ORG]": 28996, - "[/H:PER]": 28997, - "[/H]": 28998, - "[/T:ORG]": 28999, - "[/T:PER]": 29000, - "[/T]": 29001, - "[H:ORG]": 29002, - "[H:PER]": 29003, - "[H=ORG]": 29004, - "[H=PER]": 29005, - "[H]": 29006, - "[T:ORG]": 29007, - "[T:PER]": 29008, - "[T=ORG]": 29009, - "[T=PER]": 29010, - "[T]": 29011, - } - - else: - assert taskmodule.argument_markers == [ - "[/H]", - "[/T]", - "[H=ORG]", - "[H=PER]", - "[H]", - "[T=ORG]", - "[T=PER]", - "[T]", - ] - assert taskmodule.argument_markers_to_id == { - "[/H]": 28996, - "[/T]": 28997, - "[H=ORG]": 28998, - "[H=PER]": 28999, - "[H]": 29000, - "[T=ORG]": 29001, - "[T=PER]": 29002, - "[T]": 29003, - } - else: - if taskmodule.add_type_to_marker: - assert taskmodule.argument_markers == [ - "[/H:ORG]", - "[/H:PER]", - "[/H]", - "[/T:ORG]", - "[/T:PER]", - "[/T]", - "[H:ORG]", - "[H:PER]", - "[H]", - "[T:ORG]", - "[T:PER]", - "[T]", - ] - assert taskmodule.argument_markers_to_id == { - "[/H:ORG]": 28996, - "[/H:PER]": 28997, - "[/H]": 28998, - "[/T:ORG]": 28999, - "[/T:PER]": 29000, - "[/T]": 29001, - "[H:ORG]": 29002, - "[H:PER]": 29003, - "[H]": 29004, - "[T:ORG]": 29005, - "[T:PER]": 29006, - "[T]": 29007, - } - else: - assert taskmodule.argument_markers == ["[/H]", "[/T]", "[H]", "[T]"] - assert taskmodule.argument_markers_to_id == { - "[/H]": 28996, - "[/T]": 28997, - "[H]": 28998, - "[T]": 28999, - } - - assert taskmodule.label_to_id == { - "org:founded_by": 1, - "per:employee_of": 2, - "per:founder": 3, - "no_relation": 0, - } - assert taskmodule.id_to_label == { - 1: "org:founded_by", - 2: "per:employee_of", - 3: "per:founder", - 0: "no_relation", - } - - -def test_config(taskmodule): - config = taskmodule._config() - assert config["taskmodule_type"] == "RETextClassificationWithIndicesTaskModule" - assert taskmodule.PREPARED_ATTRIBUTES == ["labels", "entity_labels"] - assert all(attribute in config for attribute in taskmodule.PREPARED_ATTRIBUTES) - assert config["labels"] == ["org:founded_by", "per:employee_of", "per:founder"] - assert config["entity_labels"] == ["ORG", "PER"] - - -@pytest.mark.parametrize("encode_target", [False, True]) -def test_encode(taskmodule, documents, encode_target): - task_encodings = taskmodule.encode(documents, encode_target=encode_target) - - assert len(task_encodings) == 7 - - encoding = task_encodings[0] - - tokens = taskmodule.tokenizer.convert_ids_to_tokens(encoding.inputs["input_ids"]) - assert len(tokens) == len(encoding.inputs["input_ids"]) - - if taskmodule.add_type_to_marker: - assert tokens[:14] == [ - "[CLS]", - "[H:PER]", - "En", - "##ti", - "##ty", - "A", - "[/H:PER]", - "works", - "at", - "[T:ORG]", - "B", - "[/T:ORG]", - ".", - "[SEP]", - ] - else: - assert tokens[:14] == [ - "[CLS]", - "[H]", - "En", - "##ti", - "##ty", - "A", - "[/H]", - "works", - "at", - "[T]", - "B", - "[/T]", - ".", - "[SEP]", - ] - if taskmodule.append_markers: - assert len(tokens) == 14 + 4 - assert tokens[-4:] == ["[H=PER]", "[SEP]", "[T=ORG]", "[SEP]"] - else: - assert len(tokens) == 14 - - if encode_target: - assert encoding.targets == [2] - else: - assert not encoding.has_targets - - with pytest.raises(ValueError, match=re.escape("task encoding has no target")): - encoding.targets - - -@pytest.fixture(scope="module") -def batch(taskmodule, documents): - documents = [documents[i] for i in [0, 1, 4]] - task_encodings = taskmodule.encode(documents, encode_target=True) - return taskmodule.collate(task_encodings[:2]) - - -def test_collate(taskmodule, batch): - inputs, targets = batch - - assert "input_ids" in inputs - assert "attention_mask" in inputs - assert inputs["input_ids"].shape == inputs["attention_mask"].shape - - if taskmodule.append_markers: - assert inputs["input_ids"].shape == (2, 25) - if taskmodule.add_type_to_marker: - torch.testing.assert_close( - inputs.input_ids, - torch.tensor( - [ - [ - 101, - 29003, - 13832, - 3121, - 2340, - 138, - 28997, - 1759, - 1120, - 29007, - 139, - 28999, - 119, - 102, - 29005, - 102, - 29009, - 102, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 29003, - 13832, - 3121, - 2340, - 144, - 28997, - 1759, - 1120, - 29007, - 145, - 28999, - 119, - 1262, - 1771, - 146, - 119, - 102, - 29005, - 102, - 29009, - 102, - ], - ] - ), - ) - else: - torch.testing.assert_close( - inputs.input_ids, - torch.tensor( - [ - [ - 101, - 29000, - 13832, - 3121, - 2340, - 138, - 28996, - 1759, - 1120, - 29003, - 139, - 28997, - 119, - 102, - 28999, - 102, - 29001, - 102, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 29000, - 13832, - 3121, - 2340, - 144, - 28996, - 1759, - 1120, - 29003, - 145, - 28997, - 119, - 1262, - 1771, - 146, - 119, - 102, - 28999, - 102, - 29001, - 102, - ], - ] - ), - ) - torch.testing.assert_close( - inputs.attention_mask, - torch.tensor( - [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ), - ) - - else: - assert inputs["input_ids"].shape == (2, 21) - - if taskmodule.add_type_to_marker: - torch.testing.assert_close( - inputs.input_ids, - torch.tensor( - [ - [ - 101, - 29003, - 13832, - 3121, - 2340, - 138, - 28997, - 1759, - 1120, - 29005, - 139, - 28999, - 119, - 102, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 29003, - 13832, - 3121, - 2340, - 144, - 28997, - 1759, - 1120, - 29005, - 145, - 28999, - 119, - 1262, - 1771, - 146, - 119, - 102, - ], - ] - ), - ) - else: - torch.testing.assert_close( - inputs.input_ids, - torch.tensor( - [ - [ - 101, - 28998, - 13832, - 3121, - 2340, - 138, - 28996, - 1759, - 1120, - 28999, - 139, - 28997, - 119, - 102, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - [ - 101, - 1752, - 5650, - 119, - 28998, - 13832, - 3121, - 2340, - 144, - 28996, - 1759, - 1120, - 28999, - 145, - 28997, - 119, - 1262, - 1771, - 146, - 119, - 102, - ], - ] - ), - ) - torch.testing.assert_close( - inputs.attention_mask, - torch.tensor( - [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ), - ) - - assert set(targets) == {"labels"} - torch.testing.assert_close(targets["labels"], torch.tensor([2, 2])) - - -def test_unbatch_output(taskmodule, model_output): - unbatched_outputs = taskmodule.unbatch_output(model_output) - - assert len(unbatched_outputs) == 8 - - labels = [ - "org:founded_by", - "no_relation", - "per:employee_of", - "per:founder", - "org:founded_by", - "no_relation", - "no_relation", - "no_relation", - ] - probabilities = [0.6, 0.5, 0.6, 0.5, 0.4, 0.5, 0.6, 0.5] - - for output, label, probability in zip(unbatched_outputs, labels, probabilities): - assert set(output.keys()) == {"labels", "probabilities"} - assert output["labels"] == [label] - assert output["probabilities"] == pytest.approx([probability]) - - -@pytest.mark.parametrize("inplace", [False, True]) -def test_decode(taskmodule, documents, model_output, inplace): - # copy the documents, because the taskmodule may modify them - documents = [documents[i].copy() for i in [0, 1, 4]] - - encodings = taskmodule.encode(documents, encode_target=False) - unbatched_outputs = taskmodule.unbatch_output(model_output) - decoded_documents = taskmodule.decode( - task_encodings=encodings, - task_outputs=unbatched_outputs, - inplace=inplace, - ) - - assert len(decoded_documents) == len(documents) - - if inplace: - assert {id(doc) for doc in decoded_documents} == {id(doc) for doc in documents} - else: - assert {id(doc) for doc in decoded_documents}.isdisjoint({id(doc) for doc in documents}) - - expected_scores = [0.6, 0.5, 0.6, 0.5, 0.4, 0.5, 0.6, 0.5] - i = 0 - for document in decoded_documents: - for relation_expected, relation_decoded in zip( - document["entities"], document["entities"].predictions - ): - assert relation_expected.start == relation_decoded.start - assert relation_expected.end == relation_decoded.end - assert relation_expected.label == relation_decoded.label - assert expected_scores[i] == pytest.approx(relation_decoded.score) - i += 1 - - if not inplace: - for document in documents: - assert not document["relations"].predictions - - -def test_encode_with_partition(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - partition_annotation="sentences", - ) - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - assert len(documents) == 7 - encodings = taskmodule.encode(documents) - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(encoding.inputs["input_ids"]) - for encoding in encodings - ] - assert len(encodings) == 5 - assert encodings[0].document != encodings[1].document - assert encodings[1].document != encodings[2].document - # the last document contains 3 valid relations - assert encodings[2].document == encodings[3].document - assert encodings[3].document == encodings[4].document - assert tokens[0] == [ - "[CLS]", - "[H]", - "En", - "##ti", - "##ty", - "A", - "[/H]", - "works", - "at", - "[T]", - "B", - "[/T]", - ".", - "[SEP]", - ] - assert tokens[1] == [ - "[CLS]", - "[H]", - "En", - "##ti", - "##ty", - "G", - "[/H]", - "works", - "at", - "[T]", - "H", - "[/T]", - ".", - "[SEP]", - ] - assert tokens[2] == [ - "[CLS]", - "[H]", - "En", - "##ti", - "##ty", - "M", - "[/H]", - "works", - "at", - "[T]", - "N", - "[/T]", - ".", - "[SEP]", - ] - assert tokens[3] == [ - "[CLS]", - "And", - "[H]", - "it", - "[/H]", - "founded", - "[T]", - "O", - "[/T]", - "[SEP]", - ] - assert tokens[4] == [ - "[CLS]", - "And", - "[T]", - "it", - "[/T]", - "founded", - "[H]", - "O", - "[/H]", - "[SEP]", - ] - - -def test_encode_with_windowing(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - max_window=12, - ) - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - assert len(documents) == 7 - encodings = taskmodule.encode(documents) - assert len(encodings) == 3 - for encoding in encodings: - assert len(encoding.inputs["input_ids"]) <= taskmodule.max_window - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(encoding.inputs["input_ids"]) - for encoding in encodings - ] - assert tokens[0] == [ - "[CLS]", - "at", - "[T]", - "H", - "[/T]", - ".", - "And", - "founded", - "[H]", - "I", - "[/H]", - "[SEP]", - ] - assert tokens[1] == [ - "[CLS]", - ".", - "And", - "[H]", - "it", - "[/H]", - "founded", - "[T]", - "O", - "[/T]", - ".", - "[SEP]", - ] - assert tokens[2] == [ - "[CLS]", - ".", - "And", - "[T]", - "it", - "[/T]", - "founded", - "[H]", - "O", - "[/H]", - ".", - "[SEP]", - ] - - -def test_encode_with_allow_discontinuous_text(documents): - tokenizer_name_or_path = "bert-base-cased" - # tokenizer_name_or_path = "allenai/longformer-scico" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - max_window=12, - allow_discontinuous_text=True, - ) - - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - assert len(documents) == 7 - encodings = taskmodule.encode(documents) - assert len(encodings) == 3 - - for encoding in encodings: - assert len(encoding.inputs["input_ids"]) <= taskmodule.max_window - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(encoding.inputs["input_ids"]) - for encoding in encodings - ] - assert tokens == [ - ["[CLS]", "at", "[T]", "H", "[/T]", "[SEP]", "founded", "[H]", "I", "[/H]", "[SEP]"], - ["[CLS]", "And", "[H]", "it", "[/H]", "founded", "[T]", "O", "[/T]", "[SEP]"], - ["[CLS]", "And", "[T]", "it", "[/T]", "founded", "[H]", "O", "[/H]", "[SEP]"], - ] - - -def test_encode_with_allow_discontinuous_text_and_binary_relations(): - """This checks whether relation arguments at the very beginning or end of the document are - encoded correctly. - - Also, it checks whether the encoding of the consecutive spans that fit within the frame - specified by max_window is correct. - """ - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - tokenizer_name_or_path=tokenizer_name_or_path, - max_window=128, - allow_discontinuous_text=True, - ) - texts = [ - "Loren ipsun dolor sit anet, consectetur adipisci elit, sed eiusnod tenpor incidunt ut labore et dolore nagna aliqua.", - "Ut enin ad ninin venian, quis nostrun exercitationen ullan corporis suscipit laboriosan, nisi ut aliquid ex ea connodi consequatur.", - "Quis aute iure reprehenderit in voluptate velit esse cillun dolore eu fugiat nulla pariatur.", - "Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt nollit anin id est laborun.", - ] - text_lengths = [len(text) for text in texts] - sep = " " - - doc = TextDocumentWithLabeledSpansAndBinaryRelations( - text=sep.join(texts), - id="123", - ) - - labeled_spans = [] - offset = 0 - for i, text in enumerate(texts): - labeled_spans.append( - LabeledSpan(start=0 + offset, end=text_lengths[i] + offset, label="sentence") - ) - offset += text_lengths[i] + len(sep) - - for span in labeled_spans: - doc.labeled_spans.append(span) - assert doc.labeled_spans.resolve() == [ - ( - "sentence", - "Loren ipsun dolor sit anet, consectetur adipisci elit, sed eiusnod tenpor incidunt ut " - "labore et dolore nagna aliqua.", - ), - ( - "sentence", - "Ut enin ad ninin venian, quis nostrun exercitationen ullan corporis suscipit laboriosan, " - "nisi ut aliquid ex ea connodi consequatur.", - ), - ( - "sentence", - "Quis aute iure reprehenderit in voluptate velit esse cillun dolore eu fugiat nulla pariatur.", - ), - ( - "sentence", - "Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt nollit " - "anin id est laborun.", - ), - ] - - rel_start = BinaryRelation( - head=doc.labeled_spans[0], tail=doc.labeled_spans[2], label="relation", score=1.0 - ) - doc.binary_relations.append(rel_start) - rel_end = BinaryRelation( - head=doc.labeled_spans[-1], tail=doc.labeled_spans[0], label="relation", score=1.0 - ) - doc.binary_relations.append(rel_end) - rel_consecutive = BinaryRelation( - head=doc.labeled_spans[2], tail=doc.labeled_spans[3], label="relation", score=1.0 - ) - doc.binary_relations.append(rel_consecutive) - - # test document where everything is already included in one argument frame - doc2 = TextDocumentWithLabeledSpansAndBinaryRelations("A founded B.", id="123") - doc2.labeled_spans.append(LabeledSpan(start=0, end=1, label="PER")) - doc2.labeled_spans.append(LabeledSpan(start=10, end=11, label="PER")) - assert doc2.labeled_spans.resolve() == [("PER", "A"), ("PER", "B")] - rel = BinaryRelation(head=doc2.labeled_spans[0], tail=doc2.labeled_spans[1], label="relation") - doc2.binary_relations.append(rel) - - taskmodule.prepare([doc, doc2]) - encoded = taskmodule.encode_input(doc) - - decoded_arg_start = taskmodule.tokenizer.decode(encoded[0].inputs["input_ids"]) - decoded_arg_end = taskmodule.tokenizer.decode(encoded[1].inputs["input_ids"]) - decoded_arg_consecutive = taskmodule.tokenizer.decode(encoded[2].inputs["input_ids"]) - - assert ( - decoded_arg_start - == "[CLS] [H] Loren ipsun dolor sit anet, consectetur adipisci elit, sed eiusnod tenpor incidunt ut labore et dolore nagna aliqua. [/H] Ut enin ad ninin venian, quis no [SEP] ex ea connodi consequatur. [T] Quis aute iure reprehenderit in voluptate velit esse cillun dolore eu fugiat nulla pariatur. [/T] Excepteur sint obcaecat cupid [SEP]" - ) - - assert ( - decoded_arg_end - == "[CLS] [T] Loren ipsun dolor sit anet, consectetur adipisci elit, sed eiusnod tenpor incidunt ut labore et dolore nagna aliqua. [/T] Ut enin ad ninin venian, quis no [SEP]se cillun dolore eu fugiat nulla pariatur. [H] Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt nollit anin id est laborun. [/H] [SEP]" - ) - - assert ( - decoded_arg_consecutive - == "[CLS] ex ea connodi consequatur. [H] Quis aute iure reprehenderit in voluptate velit esse cillun dolore eu fugiat nulla pariatur. [/H] [T] Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt nollit anin id est laborun. [/T] [SEP]" - ) - - encoded2 = taskmodule.encode_input(doc2) - assert len(encoded2) == 1 - decoded2 = taskmodule.tokenizer.decode(encoded2[0].inputs["input_ids"]) - assert decoded2 == "[CLS] [H] A [/H] founded [T] B [/T]. [SEP]" - - -def get_arg_token_span( - tokens: List[str], - start_indices: List[int], - end_indices: List[int], - argument_role2idx: Dict[str, int], -) -> Dict[str, List[str]]: - return { - role: tokens[start_indices[argument_role2idx[role]] : end_indices[argument_role2idx[role]]] - for role, idx in argument_role2idx.items() - } - - -def test_encode_with_add_argument_indices(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_argument_indices_to_input=True, - ) - - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - encodings = taskmodule.encode(documents, encode_target=True) - assert len(encodings) == 7 - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - arg_spans = [ - get_arg_token_span( - current_tokens, - current_start_indices, - current_end_indices, - taskmodule.argument_role2idx, - ) - for current_tokens, current_start_indices, current_end_indices in zip( - tokens, inputs["pooler_start_indices"].tolist(), inputs["pooler_end_indices"].tolist() - ) - ] - - assert arg_spans == [ - {"head": ["En", "##ti", "##ty", "A"], "tail": ["B"]}, - {"head": ["En", "##ti", "##ty", "G"], "tail": ["H"]}, - {"head": ["En", "##ti", "##ty", "G"], "tail": ["I"]}, - {"head": ["I"], "tail": ["H"]}, - {"head": ["En", "##ti", "##ty", "M"], "tail": ["N"]}, - {"head": ["it"], "tail": ["O"]}, - {"head": ["O"], "tail": ["it"]}, - ] - - -def test_encode_with_add_argument_indices_and_without_insert_markers(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_argument_indices_to_input=True, - insert_markers=False, - ) - - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - encodings = taskmodule.encode(documents, encode_target=True) - assert len(encodings) == 7 - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - arg_spans = [ - get_arg_token_span( - current_tokens, - current_start_indices, - current_end_indices, - taskmodule.argument_role2idx, - ) - for current_tokens, current_start_indices, current_end_indices in zip( - tokens, inputs["pooler_start_indices"].tolist(), inputs["pooler_end_indices"].tolist() - ) - ] - - assert arg_spans == [ - {"head": ["En", "##ti", "##ty", "A"], "tail": ["B"]}, - {"head": ["En", "##ti", "##ty", "G"], "tail": ["H"]}, - {"head": ["En", "##ti", "##ty", "G"], "tail": ["I"]}, - {"head": ["I"], "tail": ["H"]}, - {"head": ["En", "##ti", "##ty", "M"], "tail": ["N"]}, - {"head": ["it"], "tail": ["O"]}, - {"head": ["O"], "tail": ["it"]}, - ] - - -def test_find_sublist(): - # default case - assert find_sublist(sub=[2, 3], bigger=[1, 2, 3, 4]) == 1 - # no sublist - assert find_sublist(sub=[2, 3], bigger=[1, 3, 2, 4]) == -1 - # empty sublist: occurs on every position, but first is returned - assert find_sublist(sub=[], bigger=[1, 3, 2, 4]) == 0 - # empty bigger - assert find_sublist(sub=[2, 3], bigger=[]) == -1 - - -def test_encode_with_add_argument_indices_and_windowing(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_argument_indices_to_input=True, - max_window=12, - ) - - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - encodings = taskmodule.encode(documents, encode_target=True) - assert len(encodings) == 3 - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - arg_spans = [ - get_arg_token_span( - current_tokens, - current_start_indices, - current_end_indices, - taskmodule.argument_role2idx, - ) - for current_tokens, current_start_indices, current_end_indices in zip( - tokens, inputs["pooler_start_indices"].tolist(), inputs["pooler_end_indices"].tolist() - ) - ] - - assert arg_spans == [ - {"head": ["I"], "tail": ["H"]}, - {"head": ["it"], "tail": ["O"]}, - {"head": ["O"], "tail": ["it"]}, - ] - - -def test_encode_with_add_argument_indices_windowing_and_without_insert_markers(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_argument_indices_to_input=True, - max_window=8, - insert_markers=False, - ) - - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - encodings = taskmodule.encode(documents, encode_target=True) - assert len(encodings) == 3 - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - arg_spans = [ - get_arg_token_span( - current_tokens, - current_start_indices, - current_end_indices, - taskmodule.argument_role2idx, - ) - for current_tokens, current_start_indices, current_end_indices in zip( - tokens, inputs["pooler_start_indices"].tolist(), inputs["pooler_end_indices"].tolist() - ) - ] - - assert arg_spans == [ - {"head": ["I"], "tail": ["H"]}, - {"head": ["it"], "tail": ["O"]}, - {"head": ["O"], "tail": ["it"]}, - ] - - -@pytest.mark.parametrize("handle_relations_with_same_arguments", ["keep_first", "keep_none"]) -@pytest.mark.parametrize("add_candidate_relations", [False, True]) -@pytest.mark.parametrize("collect_statistics", [False, True]) -def test_encode_input_multiple_relations_for_same_arguments( - caplog, handle_relations_with_same_arguments, add_candidate_relations, collect_statistics -): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - handle_relations_with_same_arguments=handle_relations_with_same_arguments, - collect_statistics=collect_statistics, - add_candidate_relations=add_candidate_relations, - ) - document = TestDocument(text="A founded B.", id="test_doc") - document.entities.append(LabeledSpan(start=0, end=1, label="PER")) - document.entities.append(LabeledSpan(start=10, end=11, label="PER")) - entities = document.entities - assert str(entities[0]) == "A" - assert str(entities[1]) == "B" - document.relations.extend( - [ - BinaryRelation(head=entities[0], tail=entities[1], label="per:founded_by"), - BinaryRelation(head=entities[0], tail=entities[1], label="per:founder"), - BinaryRelation(head=entities[0], tail=entities[1], label="per:founded_by"), - ] - ) - taskmodule.prepare([document]) - - with caplog.at_level(logging.WARNING): - encodings = taskmodule.encode_input(document) - - statistics = taskmodule.get_statistics() - candidate_relation = [enc.metadata["candidate_annotation"] for enc in encodings] - candidate_relation_tuples = [ - (rel.head.resolve(), rel.label, rel.tail.resolve()) for rel in candidate_relation - ] - - if handle_relations_with_same_arguments == "keep_first": - # Note: Warnings are shown only if statistics are disabled. For details see comment at - # src/pie_modules/taskmodules/re_text_classification_with_indices.py:811-818 - expected_warning = ( - "doc.id=test_doc: there are multiple relations with the same arguments " - "(('head', ('PER', 'A')), ('tail', ('PER', 'B'))), but different labels: " - "['per:founded_by', 'per:founder', 'per:founded_by']. We only keep the first " - "occurring relation which has the label='per:founded_by'." - ) - if not add_candidate_relations: - # with 'keep_first', only first relation occurred is kept ('per:founded_by'). - # full duplicate of 'per:founded_by' is removed and appears neither as available, - # nor as skipped in statistics. - assert candidate_relation_tuples == [(("PER", "A"), "per:founded_by", ("PER", "B"))] - if collect_statistics: - assert statistics == { - ("available", "per:founded_by"): 1, - ("available", "per:founder"): 1, - ("skipped_same_arguments", "per:founder"): 1, - ("used", "per:founded_by"): 1, - } - assert caplog.messages == [] - else: - assert statistics == {} - assert caplog.messages == [expected_warning] - - else: - # as above, but with candidate (negative) relations added - assert candidate_relation_tuples == [ - (("PER", "A"), "per:founded_by", ("PER", "B")), - (("PER", "B"), "no_relation", ("PER", "A")), - ] - if collect_statistics: - assert statistics == { - ("available", "per:founded_by"): 1, - ("available", "per:founder"): 1, - ("used", "no_relation"): 1, - ("used", "per:founded_by"): 1, - ("skipped_same_arguments", "per:founder"): 1, - } - assert caplog.messages == [] - else: - assert statistics == {} - assert caplog.messages == [expected_warning] - - elif handle_relations_with_same_arguments == "keep_none": - # Note: Warnings are shown only if statistics are disabled. For details see comment at - # src/pie_modules/taskmodules/re_text_classification_with_indices.py:811-818 - expected_warning = ( - "doc.id=test_doc: there are multiple relations with the same arguments " - "(('head', ('PER', 'A')), ('tail', ('PER', 'B'))), but different labels: " - "['per:founded_by', 'per:founder', 'per:founded_by']. All relations will be removed." - ) - if not add_candidate_relations: - # with 'keep_none' both relations sharing same arguments are removed - # full duplicate of 'per:founded_by' is removed and appears neither as available, - # nor as skipped in statistics. - assert candidate_relation_tuples == [] - if collect_statistics: - assert statistics == { - ("available", "per:founded_by"): 1, - ("available", "per:founder"): 1, - ("skipped_same_arguments", "per:founder"): 1, - ("skipped_same_arguments", "per:founded_by"): 1, - } - assert caplog.messages == [] - else: - assert statistics == {} - assert caplog.messages == [expected_warning] - else: - # all conflicting relations go into the same direction, so we can create a candidate (negative) - # relation for the other direction. - assert candidate_relation_tuples == [(("PER", "B"), "no_relation", ("PER", "A"))] - if collect_statistics: - assert statistics == { - ("available", "per:founded_by"): 1, - ("available", "per:founder"): 1, - ("skipped_same_arguments", "per:founded_by"): 1, - ("skipped_same_arguments", "per:founder"): 1, - ("used", "no_relation"): 1, - } - assert caplog.messages == [] - else: - assert statistics == {} - assert caplog.messages == [expected_warning] - - -def test_encode_input_handle_relations_with_same_arguments_unknown_value(): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - handle_relations_with_same_arguments="unknown_value", - ) - document = TestDocument(text="A founded B.", id="test_doc") - document.entities.append(LabeledSpan(start=0, end=1, label="PER")) - document.entities.append(LabeledSpan(start=10, end=11, label="PER")) - document.relations.append( - BinaryRelation( - head=document.entities[0], tail=document.entities[1], label="per:founded_by" - ) - ) - document.relations.append( - BinaryRelation(head=document.entities[0], tail=document.entities[1], label="per:founder") - ) - assert document.relations.resolve() == [ - ("per:founded_by", (("PER", "A"), ("PER", "B"))), - ("per:founder", (("PER", "A"), ("PER", "B"))), - ] - taskmodule.prepare([document]) - - with pytest.raises(ValueError) as excinfo: - taskmodule.encode_input(document) - assert str(excinfo.value) == ( - "'handle_relations_with_same_arguments' must be 'keep_first' or 'keep_none', but got `unknown_value`." - ) - - -@pytest.mark.parametrize("handle_relations_with_same_arguments", ["keep_first", "keep_none"]) -@pytest.mark.parametrize("add_candidate_relations", [False, True]) -@pytest.mark.parametrize("collect_statistics", [False, True]) -def test_encode_input_duplicated_relations( - caplog, handle_relations_with_same_arguments, add_candidate_relations, collect_statistics -): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - handle_relations_with_same_arguments=handle_relations_with_same_arguments, - add_candidate_relations=add_candidate_relations, - collect_statistics=collect_statistics, - ) - document = TestDocument(text="A founded B.", id="test_doc") - document.entities.append(LabeledSpan(start=0, end=1, label="PER")) - document.entities.append(LabeledSpan(start=10, end=11, label="PER")) - entities = document.entities - assert str(entities[0]) == "A" - assert str(entities[1]) == "B" - document.relations.extend( - [ - BinaryRelation(head=entities[0], tail=entities[1], label="per:founded_by"), - BinaryRelation(head=entities[0], tail=entities[1], label="per:founded_by"), - ] - ) - taskmodule.prepare([document]) - with caplog.at_level(logging.WARNING): - encodings = taskmodule.encode_input(document) - - statistics = taskmodule.get_statistics() - - assert len(caplog.messages) == 1 - assert ( - caplog.messages[0] == "doc.id=test_doc: Relation annotation " - "`('per:founded_by', (('PER', 'A'), ('PER', 'B')))` is duplicated. We keep " - "only one of them. Duplicate won't appear in statistics either as 'available' or as skipped." - ) - candidate_relation = [enc.metadata["candidate_annotation"] for enc in encodings] - candidate_relation_tuples = [ - (rel.head.resolve(), rel.label, rel.tail.resolve()) for rel in candidate_relation - ] - # equally for 'keep_first' and 'keep_last', full duplicates are not affected and do not appear in statistics, but still - # generate a warning. - if add_candidate_relations: - assert candidate_relation_tuples == [ - (("PER", "A"), "per:founded_by", ("PER", "B")), - (("PER", "B"), "no_relation", ("PER", "A")), - ] - if collect_statistics: - assert statistics == { - ("available", "per:founded_by"): 1, - ("used", "no_relation"): 1, - ("used", "per:founded_by"): 1, - } - else: - assert statistics == {} - else: - assert candidate_relation_tuples == [(("PER", "A"), "per:founded_by", ("PER", "B"))] - if collect_statistics: - assert statistics == { - ("available", "per:founded_by"): 1, - ("used", "per:founded_by"): 1, - } - else: - assert statistics == {} - - -def test_encode_input_argument_role_unknown(documents): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - # the tail argument is not in the role_to_marker - argument_role_to_marker={HEAD: "H"}, - ) - taskmodule.prepare(documents) - with pytest.raises(ValueError) as excinfo: - taskmodule.encode_input(documents[1]) - assert ( - str(excinfo.value) == "role='tail' not in known roles=['head'] (did you initialise the " - "taskmodule with the correct argument_role_to_marker dictionary?)" - ) - - -def test_encode_input_with_add_candidate_relations(documents): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - add_candidate_relations=True, - ) - taskmodule.prepare(documents) - documents_without_relations = [] - encodings = [] - # just take the first three documents - for doc in documents[:3]: - doc_without_relations = doc.copy() - relations = list(doc_without_relations.relations) - doc_without_relations.relations.clear() - # re-add one relation to test if it is kept - if len(relations) > 0: - doc_without_relations.relations.append(relations[0]) - documents_without_relations.append(doc_without_relations) - encodings.extend(taskmodule.encode(doc_without_relations)) - - assert len(encodings) == 4 - relations = [encoding.metadata["candidate_annotation"] for encoding in encodings] - texts = [encoding.document.text for encoding in encodings] - relation_tuples = [(str(rel.head), rel.label, str(rel.tail)) for rel in relations] - - # There are no entities in the first document, so there are no created relation candidates - - # this relation was kept - assert texts[0] == "Entity A works at B." - assert relation_tuples[0] == ("Entity A", "per:employee_of", "B") - - # the following relations were added - assert texts[1] == "Entity A works at B." - assert relation_tuples[1] == ("B", "no_relation", "Entity A") - assert texts[2] == "Entity C and D." - assert relation_tuples[2] == ("Entity C", "no_relation", "D") - assert texts[3] == "Entity C and D." - assert relation_tuples[3] == ("D", "no_relation", "Entity C") - - -@pytest.fixture -def document_with_nary_relations(): - @dataclasses.dataclass - class TestDocumentWithNaryRelations(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - relations: AnnotationLayer[NaryRelation] = annotation_field(target="entities") - - document = TestDocumentWithNaryRelations( - text="Entity A works at B.", id="doc_with_nary_relations" - ) - document.entities.append(LabeledSpan(start=0, end=8, label="PER")) - document.entities.append(LabeledSpan(start=18, end=19, label="PER")) - document.relations.append( - NaryRelation( - arguments=tuple(document.entities), - roles=tuple(["head", "tail"]), - label="per:employee_of", - ) - ) - return document - - -def test_encode_input_with_add_candidate_relations_with_wrong_relation_type( - document_with_nary_relations, -): - doc = document_with_nary_relations - - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - add_candidate_relations=True, - argument_role_to_marker={HEAD: "H", "arg2": "T"}, - ) - taskmodule.prepare([doc]) - with pytest.raises(NotImplementedError) as excinfo: - taskmodule.encode_input(doc) - assert ( - str(excinfo.value) - == "doc.id=doc_with_nary_relations: the taskmodule does not yet support adding relation candidates " - "with argument roles other than 'head' and 'tail': ['arg2', 'head']" - ) - - -def test_filter_relations_by_argument_type_whitelist(documents): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - add_candidate_relations=True, - argument_type_whitelist=[["PER", "ORG"], ["ORG", "PER"]], - ) - doc = documents[4] - taskmodule.prepare(documents) - - assert doc.entities.resolve() == [("PER", "Entity G"), ("ORG", "H"), ("ORG", "I")] - assert doc.relations.resolve() == [ - ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), - ("per:founder", (("PER", "Entity G"), ("ORG", "I"))), - ("org:founded_by", (("ORG", "I"), ("ORG", "H"))), - ] - arguments2relation = {} - for rel in doc.relations: - arguments2relation[get_relation_argument_spans_and_roles(rel)] = rel - assert len(arguments2relation) == 3 - - taskmodule._filter_relations_by_argument_type_whitelist(arguments2relation=arguments2relation) - assert len(arguments2relation) == 2 - - relation_tuples = [rel.resolve() for rel in arguments2relation.values()] - assert relation_tuples[0] == ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))) - assert relation_tuples[1] == ("per:founder", (("PER", "Entity G"), ("ORG", "I"))) - - assert ("org:founded_by", (("ORG", "I"), ("ORG", "H"))) not in relation_tuples - - -def test_add_candidate_relations_with_argument_type_whitelist(documents): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - add_candidate_relations=True, - argument_type_whitelist=[["PER", "ORG"], ["ORG", "PER"]], - ) - doc = documents[4] - taskmodule.prepare(documents) - - assert doc.entities.resolve() == [("PER", "Entity G"), ("ORG", "H"), ("ORG", "I")] - assert doc.relations.resolve() == [ - ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), - ("per:founder", (("PER", "Entity G"), ("ORG", "I"))), - ("org:founded_by", (("ORG", "I"), ("ORG", "H"))), - ] - arguments2relation = {} - for rel in doc.relations: - arguments2relation[get_relation_argument_spans_and_roles(rel)] = rel - assert len(arguments2relation) == 3 - - taskmodule._add_candidate_relations( - arguments2relation=arguments2relation, entities=doc.entities - ) - assert len(arguments2relation) == 5 - - relation_tuples = [rel.resolve() for rel in arguments2relation.values()] - - # Original relations from document (aren't affected by whitelist) - assert relation_tuples[0] == ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))) - assert relation_tuples[1] == ("per:founder", (("PER", "Entity G"), ("ORG", "I"))) - assert relation_tuples[2] == ("org:founded_by", (("ORG", "I"), ("ORG", "H"))) - - # Relation candidate added by _add_candidate_relations() - assert relation_tuples[3] == ("no_relation", (("ORG", "H"), ("PER", "Entity G"))) - assert relation_tuples[4] == ("no_relation", (("ORG", "I"), ("PER", "Entity G"))) - - # Relations not created due to whitelist - assert ("no_relation", (("ORG", "H"), ("ORG", "I"))) not in relation_tuples - - -def test_filter_relations_by_argument_and_relation_type_whitelist(documents): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - add_candidate_relations=True, - argument_and_relation_type_whitelist={ - "per:employee_of": [["PER", "ORG"]], - "per:founder": [["PER", "ORG"]], - "org:founded_by": [["ORG", "PER"]], - }, - ) - doc = documents[4] - taskmodule.prepare(documents) - - assert doc.entities.resolve() == [("PER", "Entity G"), ("ORG", "H"), ("ORG", "I")] - assert doc.relations.resolve() == [ - ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), - ("per:founder", (("PER", "Entity G"), ("ORG", "I"))), - ("org:founded_by", (("ORG", "I"), ("ORG", "H"))), - ] - arguments2relation = {} - for rel in doc.relations: - arguments2relation[get_relation_argument_spans_and_roles(rel)] = rel - assert len(arguments2relation) == 3 - - taskmodule._filter_relations_by_argument_and_relation_type_whitelist( - arguments2relation=arguments2relation - ) - assert len(arguments2relation) == 2 - - relation_tuples = [rel.resolve() for rel in arguments2relation.values()] - assert relation_tuples[0] == ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))) - assert relation_tuples[1] == ("per:founder", (("PER", "Entity G"), ("ORG", "I"))) - - assert ("org:founded_by", (("ORG", "I"), ("ORG", "H"))) not in relation_tuples - - -def test_add_candidate_relations_with_argument_and_relation_type_whitelist(documents): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - add_candidate_relations=True, - argument_and_relation_type_whitelist={ - "per:employee_of": [["PER", "ORG"]], - "per:founder": [["PER", "ORG"]], - "org:founded_by": [["ORG", "PER"]], - }, - ) - doc = documents[4] - taskmodule.prepare(documents) - - assert doc.entities.resolve() == [("PER", "Entity G"), ("ORG", "H"), ("ORG", "I")] - assert doc.relations.resolve() == [ - ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), - ("per:founder", (("PER", "Entity G"), ("ORG", "I"))), - ("org:founded_by", (("ORG", "I"), ("ORG", "H"))), - ] - arguments2relation = {} - for rel in doc.relations: - arguments2relation[get_relation_argument_spans_and_roles(rel)] = rel - assert len(arguments2relation) == 3 - - taskmodule._add_candidate_relations( - arguments2relation=arguments2relation, entities=doc.entities - ) - assert len(arguments2relation) == 5 - - relation_tuples = [rel.resolve() for rel in arguments2relation.values()] - - # Original relations from document (aren't affected by whitelist) - assert relation_tuples[0] == ("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))) - assert relation_tuples[1] == ("per:founder", (("PER", "Entity G"), ("ORG", "I"))) - assert relation_tuples[2] == ("org:founded_by", (("ORG", "I"), ("ORG", "H"))) - - # Relation candidate added by _add_candidate_relations() - assert relation_tuples[3] == ("no_relation", (("ORG", "H"), ("PER", "Entity G"))) - assert relation_tuples[4] == ("no_relation", (("ORG", "I"), ("PER", "Entity G"))) - - # Relations not created due to whitelist - assert ("no_relation", (("ORG", "H"), ("ORG", "I"))) not in relation_tuples - - -def test_encode_input_with_add_reversed_relations(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_reversed_relations=True, - ) - taskmodule.prepare(documents) - encodings = [] - # just take the first three documents - for doc in documents[:3]: - encodings.extend(taskmodule.encode_input(doc)) - - assert len(encodings) == 2 - texts = [encoding.document.text for encoding in encodings] - relations = [encoding.metadata["candidate_annotation"] for encoding in encodings] - relation_tuples = [(str(rel.head), rel.label, str(rel.tail)) for rel in relations] - - # There are no relations in the first and last document, so there are also no new reversed relations - - # this is the original relation - assert texts[0] == "Entity A works at B." - assert relation_tuples[0] == ("Entity A", "per:employee_of", "B") - - # this is the reversed relation - assert texts[1] == "Entity A works at B." - assert relation_tuples[1] == ("B", "per:employee_of_reversed", "Entity A") - - # test that an already reversed relation is not reversed again - document = TestDocument( - text="Entity A works at B.", id="doc_with_relation_with_reversed_suffix" - ) - document.entities.extend( - [LabeledSpan(start=0, end=8, label="PER"), LabeledSpan(start=18, end=19, label="PER")] - ) - document.relations.append( - BinaryRelation( - head=document.entities[1], - tail=document.entities[0], - label=f"per:employee_of{taskmodule.reversed_relation_label_suffix}", - ) - ) - with pytest.raises(ValueError) as excinfo: - taskmodule.encode_input(document) - assert str(excinfo.value) == ( - "doc.id=doc_with_relation_with_reversed_suffix: The relation has the label 'per:employee_of_reversed' " - "which already ends with the reversed_relation_label_suffix='_reversed'. It looks like the relation is " - "already reversed, which is not allowed." - ) - - -def test_prepare_with_add_reversed_relations_with_label_has_suffix(): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_reversed_relations=True, - ) - document = TestDocument( - text="Entity A works at B.", id="doc_with_relation_with_reversed_suffix" - ) - document.entities.extend( - [LabeledSpan(start=0, end=8, label="PER"), LabeledSpan(start=18, end=19, label="PER")] - ) - document.relations.append( - BinaryRelation( - head=document.entities[0], - tail=document.entities[1], - label=f"per:employee_of{taskmodule.reversed_relation_label_suffix}", - ) - ) - - with pytest.raises(ValueError) as excinfo: - taskmodule.prepare([document]) - assert ( - str(excinfo.value) - == "doc.id=doc_with_relation_with_reversed_suffix: the relation label 'per:employee_of_reversed' " - "already ends with the reversed_relation_label_suffix '_reversed', this is not allowed because " - "we would not know if we should strip the suffix and revert the arguments during inference or not" - ) - - -@pytest.mark.parametrize("reverse_symmetric_relations", [False, True]) -def test_encode_input_with_add_reversed_relations_with_symmetric_relations( - reverse_symmetric_relations, caplog -): - document = TestDocument( - text="Entity A is married with B, but likes C, who is married with D.", - id="doc_with_symmetric_relation", - ) - document.entities.extend( - [ - LabeledSpan(start=0, end=8, label="PER"), - LabeledSpan(start=25, end=26, label="PER"), - LabeledSpan(start=38, end=39, label="PER"), - LabeledSpan(start=61, end=62, label="PER"), - ] - ) - assert str(document.entities[0]) == "Entity A" - assert str(document.entities[1]) == "B" - assert str(document.entities[2]) == "C" - assert str(document.entities[3]) == "D" - document.relations.extend( - [ - BinaryRelation( - head=document.entities[0], tail=document.entities[1], label="per:is_married_with" - ), - BinaryRelation( - head=document.entities[0], tail=document.entities[2], label="per:likes" - ), - BinaryRelation( - head=document.entities[2], tail=document.entities[3], label="per:is_married_with" - ), - BinaryRelation( - head=document.entities[3], tail=document.entities[2], label="per:is_married_with" - ), - ] - ) - - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_reversed_relations=True, - symmetric_relations=["per:is_married_with"], - reverse_symmetric_relations=reverse_symmetric_relations, - ) - taskmodule.prepare([document]) - encodings = taskmodule.encode_input(document) - relations = [encoding.metadata["candidate_annotation"] for encoding in encodings] - relation_tuples = [ - (str(relation.head), relation.label, str(relation.tail)) for relation in relations - ] - if reverse_symmetric_relations: - assert relation_tuples == [ - ("Entity A", "per:is_married_with", "B"), - ("Entity A", "per:likes", "C"), - ("C", "per:is_married_with", "D"), - ("D", "per:is_married_with", "C"), - ("B", "per:is_married_with", "Entity A"), - ("C", "per:likes_reversed", "Entity A"), - ] - assert len(caplog.messages) == 2 - assert ( - caplog.messages[0] - == "doc.id=doc_with_symmetric_relation: there is already a relation with reversed " - "arguments=(('head', LabeledSpan(start=61, end=62, label='PER', score=1.0)), " - "('tail', LabeledSpan(start=38, end=39, label='PER', score=1.0))) and label=per:is_married_with, " - "so we do not add the reversed relation (with label per:is_married_with) for these arguments" - ) - assert ( - caplog.messages[1] - == "doc.id=doc_with_symmetric_relation: there is already a relation with reversed " - "arguments=(('head', LabeledSpan(start=38, end=39, label='PER', score=1.0)), " - "('tail', LabeledSpan(start=61, end=62, label='PER', score=1.0))) and label=per:is_married_with, " - "so we do not add the reversed relation (with label per:is_married_with) for these arguments" - ) - else: - assert relation_tuples == [ - ("Entity A", "per:is_married_with", "B"), - ("Entity A", "per:likes", "C"), - ("C", "per:is_married_with", "D"), - ("D", "per:is_married_with", "C"), - ("C", "per:likes_reversed", "Entity A"), - ] - assert len(caplog.messages) == 0 - - caplog.clear() - document = TestDocument( - text="Entity A is married with B.", - id="doc_with_reversed_symmetric_relation", - ) - document.entities.append(LabeledSpan(start=0, end=8, label="PER")) - document.entities.append(LabeledSpan(start=25, end=26, label="PER")) - document.relations.append( - BinaryRelation( - head=document.entities[1], tail=document.entities[0], label="per:is_married_with" - ) - ) - encodings = taskmodule.encode_input(document) - relations = [encoding.metadata["candidate_annotation"] for encoding in encodings] - relation_tuples = [ - (str(relation.head), relation.label, str(relation.tail)) for relation in relations - ] - if reverse_symmetric_relations: - assert len(relation_tuples) == 2 - assert relation_tuples[0] == ("B", "per:is_married_with", "Entity A") - assert relation_tuples[1] == ("Entity A", "per:is_married_with", "B") - assert len(caplog.messages) == 1 - assert ( - caplog.messages[0] - == "doc.id=doc_with_reversed_symmetric_relation: The symmetric relation with label 'per:is_married_with' " - "has arguments (('head', LabeledSpan(start=25, end=26, label='PER', score=1.0)), " - "('tail', LabeledSpan(start=0, end=8, label='PER', score=1.0))) which are not sorted by their start " - "and end positions. This may lead to problems during evaluation because we assume that the arguments " - "of symmetric relations were sorted in the beginning and, thus, interpret relations where this is not " - "the case as reversed. All reversed relations will get their arguments swapped during inference in " - "the case of add_reversed_relations=True to remove duplicates. You may consider adding reversed " - "versions of the *symmetric* relations on your own and then setting *reverse_symmetric_relations* " - "to False." - ) - else: - assert len(relation_tuples) == 1 - assert relation_tuples[0] == ("B", "per:is_married_with", "Entity A") - assert len(caplog.messages) == 0 - - -def test_encode_input_with_add_reversed_relations_with_wrong_relation_type( - document_with_nary_relations, -): - doc = document_with_nary_relations - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - add_reversed_relations=True, - symmetric_relations=["per:employee_of"], - ) - taskmodule.prepare([doc]) - with pytest.raises(NotImplementedError) as excinfo: - taskmodule.encode_input(doc) - assert ( - str(excinfo.value) - == "doc.id=doc_with_nary_relations: the taskmodule does not yet support adding " - "reversed relations for type: " - ) - - -def test_inner_span_distance_overlap(): - dist = distance_inner((0, 2), (1, 3)) - assert dist == -1 - - -def test_span_distance_unknown_type(): - with pytest.raises(ValueError) as excinfo: - span_distance((0, 1), (2, 3), "unknown") - assert str(excinfo.value) == "unknown distance_type=unknown. use one of: center, inner, outer" - - -def test_encode_input_with_max_argument_distance(): - document = TestDocument( - text="Entity A works at B and C.", id="doc_with_three_entities_and_two_relations" - ) - e0 = LabeledSpan(start=0, end=8, label="PER") - e1 = LabeledSpan(start=18, end=19, label="PER") - e2 = LabeledSpan(start=24, end=25, label="PER") - document.entities.extend([e0, e1, e2]) - assert str(document.entities[0]) == "Entity A" - assert str(document.entities[1]) == "B" - assert str(document.entities[2]) == "C" - document.relations.append( - BinaryRelation( - head=document.entities[0], tail=document.entities[1], label="per:employee_of" - ) - ) - document.relations.append( - BinaryRelation( - head=document.entities[0], tail=document.entities[2], label="per:employee_of" - ) - ) - dist_01 = span_distance((e0.start, e0.end), (e1.start, e1.end), "inner") - dist_02 = span_distance((e0.start, e0.end), (e2.start, e2.end), "inner") - assert dist_01 == 10 - assert dist_02 == 16 - - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - max_argument_distance=10, - ) - taskmodule.prepare([document]) - encodings = taskmodule.encode_input(document) - - # there are two relations, but only one is within the max_argument_distance - assert len(encodings) == 1 - relation = encodings[0].metadata["candidate_annotation"] - assert str(relation.head) == "Entity A" - assert str(relation.tail) == "B" - assert relation.label == "per:employee_of" - - -def test_encode_input_with_max_argument_distance_with_wrong_relation_type( - document_with_nary_relations, -): - doc = document_with_nary_relations - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - max_argument_distance=10, - ) - taskmodule.prepare([doc]) - with pytest.raises(NotImplementedError) as excinfo: - encodings = taskmodule.encode_input(doc) - assert ( - str(excinfo.value) - == "doc.id=doc_with_nary_relations: the taskmodule does not yet support filtering " - "relation candidates for type: " - ) - - -@pytest.mark.parametrize("distance_type", ["inner", "outer", "unknown"]) -def test_encode_input_with_max_argument_distance_tokens(distance_type): - document = TestDocument( - text="Entity A works at B and C.", id="doc_with_three_entities_and_two_relations" - ) - e0 = LabeledSpan(start=0, end=8, label="PER") - e1 = LabeledSpan(start=18, end=19, label="PER") - e2 = LabeledSpan(start=24, end=25, label="PER") - document.entities.extend([e0, e1, e2]) - assert str(document.entities[0]) == "Entity A" - assert str(document.entities[1]) == "B" - assert str(document.entities[2]) == "C" - document.relations.append( - BinaryRelation( - head=document.entities[0], tail=document.entities[1], label="per:employee_of" - ) - ) - document.relations.append( - BinaryRelation( - head=document.entities[0], tail=document.entities[2], label="per:employee_of" - ) - ) - dist_01 = span_distance((e0.start, e0.end), (e1.start, e1.end), "inner") - dist_02 = span_distance((e0.start, e0.end), (e2.start, e2.end), "inner") - assert dist_01 == 10 - assert dist_02 == 16 - - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - max_argument_distance_tokens=3 if distance_type == "inner" else 8, - max_argument_distance_type_tokens=distance_type, - ) - taskmodule.prepare([document]) - if distance_type == "unknown": - with pytest.raises(ValueError) as excinfo: - taskmodule.encode_input(document) - assert ( - str(excinfo.value) == "unknown distance_type=unknown. use one of: center, inner, outer" - ) - return - - encodings = taskmodule.encode_input(document) - - # there are two relations, but only one is within the max_argument_distance - assert len(encodings) == 1 - encoding = encodings[0] - tokens = taskmodule.tokenizer.convert_ids_to_tokens(encoding.inputs["input_ids"]) - assert tokens == [ - "[CLS]", - "[H]", - "En", - "##ti", - "##ty", - "A", - "[/H]", - "works", - "at", - "[T]", - "B", - "[/T]", - "and", - "C", - ".", - "[SEP]", - ] - head_start = tokens.index("[H]") + 1 - head_end = tokens.index("[/H]") - tail_start = tokens.index("[T]") + 1 - tail_end = tokens.index("[/T]") - assert (head_start, head_end, tail_start, tail_end) == (2, 6, 10, 11) - # subtract 2 for the special marker tokens [/H] and [T] - inner_dist = tail_start - head_end - 2 - assert inner_dist == 2 - # subtract 2 for the special marker tokens [H] and [/T] - outer_dist = tail_end - head_start - 2 - assert outer_dist == 7 - - relation = encodings[0].metadata["candidate_annotation"] - assert str(relation.head) == "Entity A" - assert str(relation.tail) == "B" - assert relation.label == "per:employee_of" - - -def test_encode_input_with_unknown_label(): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - labels=["rel"], - entity_labels=["a", "b"], - collect_statistics=True, - ) - taskmodule.post_prepare() - - doc = TestDocument(text="hello world", id="doc_with_unknown_label") - doc.entities.append(LabeledSpan(start=0, end=5, label="a")) - doc.entities.append(LabeledSpan(start=6, end=11, label="b")) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="unknown") - ) - - task_encodings = taskmodule.encode_input(doc) - assert len(task_encodings) == 0 - - statistics = taskmodule.get_statistics() - assert statistics == {("available", "unknown"): 1, ("skipped_unknown_label", "unknown"): 1} - - -def test_encode_with_empty_partition_layer(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - partition_annotation="sentences", - ) - taskmodule.prepare(documents) - documents_without_sentences = [] - # just take the first three documents - for doc in documents[:3]: - doc_without_sentences = doc.copy() - doc_without_sentences.sentences.clear() - documents_without_sentences.append(doc_without_sentences) - - encodings = taskmodule.encode(documents_without_sentences) - # since there are no sentences, but we use partition_annotation="sentences", - # there are no encodings - assert len(encodings) == 0 - - -def test_encode_nary_relatio(): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - argument_role_to_marker={"r1": "R1", "r2": "R2", "r3": "R3"}, - # setting labels and entity_labels makes the taskmodule prepared - labels=["rel"], - entity_labels=["a", "b", "c"], - ) - taskmodule._post_prepare() - - @dataclass - class DocWithNaryRelation(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - relations: AnnotationLayer[NaryRelation] = annotation_field(target="entities") - - doc = DocWithNaryRelation(text="hello my world") - entity1 = LabeledSpan(start=0, end=5, label="a") - entity2 = LabeledSpan(start=6, end=8, label="b") - entity3 = LabeledSpan(start=9, end=14, label="c") - doc.entities.extend([entity1, entity2, entity3]) - doc.relations.append( - NaryRelation( - arguments=tuple([entity1, entity2, entity3]), - roles=tuple(["r1", "r2", "r3"]), - label="rel", - ) - ) - - task_encodings = taskmodule.encode([doc]) - assert len(task_encodings) == 1 - encoding = task_encodings[0] - assert encoding.document == doc - assert encoding.document.text == "hello my world" - rel = encoding.metadata["candidate_annotation"] - assert str(rel.arguments[0]) == "hello" - assert str(rel.arguments[1]) == "my" - assert str(rel.arguments[2]) == "world" - assert rel.label == "rel" - - -def test_encode_unknown_relation_type(): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - # setting labels and entity_labels makes the taskmodule prepared - labels=["has_wrong_type"], - entity_labels=["a"], - ) - taskmodule._post_prepare() - - @dataclass(frozen=True) - class UnknownRelation(Annotation): - arg: LabeledSpan - label: str - - @dataclass - class DocWithUnknownRelationType(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - relations: AnnotationLayer[UnknownRelation] = annotation_field(target="entities") - - doc = DocWithUnknownRelationType(text="hello world") - entity = LabeledSpan(start=0, end=1, label="a") - doc.entities.append(entity) - doc.relations.append(UnknownRelation(arg=entity, label="has_wrong_type")) - - with pytest.raises(NotImplementedError) as excinfo: - taskmodule.encode([doc]) - assert str(excinfo.value).startswith( - "the taskmodule does not yet support getting relation arguments for type: " - ) and str(excinfo.value).endswith(".UnknownRelation'>") - - -def test_encode_with_unaligned_span(caplog): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - # setting v and entity_labels makes the taskmodule prepared - labels=["rel"], - entity_labels=["a"], - ) - taskmodule._post_prepare() - - @dataclass - class MyDocument(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="entities") - - doc = MyDocument(text="hello space", id="doc1") - entity1 = LabeledSpan(start=0, end=5, label="a") - entity2 = LabeledSpan(start=7, end=13, label="a") - entity3 = LabeledSpan(start=6, end=8, label="a") - doc.entities.extend([entity1, entity2, entity3]) - # the start of entity2 is not aligned with a token, but this will get fixed - assert str(entity2) == " space" - doc.relations.append(BinaryRelation(head=entity1, tail=entity2, label="rel")) - # entity3 can not get fixed because it contains only space - assert str(entity3) == " " - doc.relations.append(BinaryRelation(head=entity1, tail=entity3, label="rel")) - - task_encodings = taskmodule.encode([doc]) - # the second relation is skipped because we can not get an aligned token span for it - assert len(task_encodings) == 1 - task_encoding = task_encodings[0] - tokens = taskmodule.tokenizer.convert_ids_to_tokens(task_encoding.inputs["input_ids"]) - assert tokens == ["[CLS]", "[H]", "hello", "[/H]", "[T]", "space", "[/T]", "[SEP]"] - - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == "WARNING" - assert ( - caplog.messages[0] - == "doc.id=doc1: Skipping invalid example, cannot get argument token slice for LabeledSpan(start=6, end=8, label='a', score=1.0): \" \"" - ) - - -def test_encode_with_log_first_n_examples(caplog): - @dataclass - class DocumentWithLabeledEntitiesAndRelations(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="entities") - - doc = DocumentWithLabeledEntitiesAndRelations(text="hello world", id="doc1") - entity1 = LabeledSpan(start=0, end=5, label="a") - entity2 = LabeledSpan(start=6, end=11, label="a") - doc.entities.extend([entity1, entity2]) - doc.relations.append(BinaryRelation(head=entity1, tail=entity2, label="rel")) - - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - log_first_n_examples=1, - ) - taskmodule.prepare([doc]) - - # we need to set the log level to INFO, otherwise the log messages are not captured - with caplog.at_level(logging.INFO): - task_encodings = taskmodule.encode([doc, doc], encode_target=True) - - # the second example is skipped because log_first_n_examples=1 - assert len(task_encodings) == 2 - assert len(caplog.records) == 5 - assert all([record.levelname == "INFO" for record in caplog.records]) - assert caplog.records[0].message == "*** Example ***" - assert caplog.records[1].message == "doc id: doc1" - assert caplog.records[2].message == "tokens: [CLS] [H] hello [/H] [T] world [/T] [SEP]" - assert caplog.records[3].message == "input_ids: 101 28998 19082 28996 28999 1362 28997 102" - assert caplog.records[4].message == "Expected label: ['rel'] (ids = [1])" - - -@pytest.mark.skipif(condition=not _TABULATE_AVAILABLE, reason="requires the 'tabulate' package") -def test_encode_with_collect_statistics(documents): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - collect_statistics=True, - ) - taskmodule.prepare(documents) - task_encodings = taskmodule.encode(documents) - statistics = taskmodule.get_statistics() - assert len(task_encodings) == 7 - - assert statistics == { - ("available", "org:founded_by"): 2, - ("available", "per:employee_of"): 3, - ("available", "per:founder"): 2, - ("used", "org:founded_by"): 2, - ("used", "per:employee_of"): 3, - ("used", "per:founder"): 2, - } - - -def test_get_global_attention(taskmodule, batch, cfg): - global_attention_mask = taskmodule._get_global_attention(input_ids=batch[0]["input_ids"]) - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(token_ids) - for token_ids in batch[0]["input_ids"].tolist() - ] - global_attention_tokens = [ - [tok for tok, m in zip(tkns, glob_attn_mask) if m] - for tkns, glob_attn_mask in zip(tokens, global_attention_mask) - ] - pad_tok = taskmodule.tokenizer.pad_token - not_global_attention_tokens = [ - [tok for tok, m in zip(tkns, glob_attn_mask) if not (m or tok == pad_tok)] - for tkns, glob_attn_mask in zip(tokens, global_attention_mask) - ] - if not cfg.get("append_markers", False): - torch.testing.assert_close( - global_attention_mask, - torch.tensor( - [ - [1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], - ] - ), - ) - assert not_global_attention_tokens == [ - ["En", "##ti", "##ty", "A", "works", "at", "B", ".", "[SEP]"], - [ - "First", - "sentence", - ".", - "En", - "##ti", - "##ty", - "G", - "works", - "at", - "H", - ".", - "And", - "founded", - "I", - ".", - "[SEP]", - ], - ] - else: - torch.testing.assert_close( - global_attention_mask, - torch.tensor( - [ - [1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0], - ] - ), - ) - assert not_global_attention_tokens == [ - ["En", "##ti", "##ty", "A", "works", "at", "B", ".", "[SEP]", "[SEP]", "[SEP]"], - [ - "First", - "sentence", - ".", - "En", - "##ti", - "##ty", - "G", - "works", - "at", - "H", - ".", - "And", - "founded", - "I", - ".", - "[SEP]", - "[SEP]", - "[SEP]", - ], - ] - - if cfg == {"add_type_to_marker": False, "append_markers": False}: - assert global_attention_tokens == [ - ["[CLS]", "[H]", "[/H]", "[T]", "[/T]"], - ["[CLS]", "[H]", "[/H]", "[T]", "[/T]"], - ] - elif cfg == {"add_type_to_marker": True, "append_markers": False}: - assert global_attention_tokens == [ - ["[CLS]", "[H:PER]", "[/H:PER]", "[T:ORG]", "[/T:ORG]"], - ["[CLS]", "[H:PER]", "[/H:PER]", "[T:ORG]", "[/T:ORG]"], - ] - elif cfg == {"add_type_to_marker": False, "append_markers": True}: - assert global_attention_tokens == [ - ["[CLS]", "[H]", "[/H]", "[T]", "[/T]", "[H=PER]", "[T=ORG]"], - ["[CLS]", "[H]", "[/H]", "[T]", "[/T]", "[H=PER]", "[T=ORG]"], - ] - elif cfg == {"add_type_to_marker": True, "append_markers": True}: - assert global_attention_tokens == [ - ["[CLS]", "[H:PER]", "[/H:PER]", "[T:ORG]", "[/T:ORG]", "[H=PER]", "[T=ORG]"], - ["[CLS]", "[H:PER]", "[/H:PER]", "[T:ORG]", "[/T:ORG]", "[H=PER]", "[T=ORG]"], - ] - else: - raise ValueError(f"unexpected config: {cfg}") - - -def get_metric_state(metric_or_collection: Union[Metric, MetricCollection]) -> Dict[str, Any]: - if isinstance(metric_or_collection, Metric): - return { - k: v.tolist() for k, v in flatten_dict_s(metric_or_collection.metric_state).items() - } - elif isinstance(metric_or_collection, MetricCollection): - return flatten_dict_s({k: get_metric_state(v) for k, v in metric_or_collection.items()}) - else: - raise ValueError(f"unsupported type: {type(metric_or_collection)}") - - -def test_configure_model_metric(documents, taskmodule): - task_encodings = taskmodule.encode(documents, encode_target=True) - batch = taskmodule.collate(task_encodings) - - metric = taskmodule.configure_model_metric(stage="train") - assert isinstance(metric, (Metric, MetricCollection)) - state = get_metric_state(metric) - assert state == { - "micro/f1_without_tn/tp": [0], - "micro/f1_without_tn/fp": [0], - "micro/f1_without_tn/tn": [0], - "micro/f1_without_tn/fn": [0], - "with_tn/f1_per_label/tp": [0, 0, 0, 0], - "with_tn/f1_per_label/fp": [0, 0, 0, 0], - "with_tn/f1_per_label/tn": [0, 0, 0, 0], - "with_tn/f1_per_label/fn": [0, 0, 0, 0], - "with_tn/macro/f1/tp": [0, 0, 0, 0], - "with_tn/macro/f1/fp": [0, 0, 0, 0], - "with_tn/macro/f1/tn": [0, 0, 0, 0], - "with_tn/macro/f1/fn": [0, 0, 0, 0], - "with_tn/micro/f1/tp": [0], - "with_tn/micro/f1/fp": [0], - "with_tn/micro/f1/tn": [0], - "with_tn/micro/f1/fn": [0], - } - assert metric.compute() == { - "no_relation/f1": tensor(0.0), - "org:founded_by/f1": tensor(0.0), - "per:employee_of/f1": tensor(0.0), - "per:founder/f1": tensor(0.0), - "macro/f1": tensor(0.0), - "micro/f1": tensor(0.0), - "micro/f1_without_tn": tensor(0.0), - } - - targets = batch[1] - metric.update(targets, targets) - state = get_metric_state(metric) - assert state == { - "micro/f1_without_tn/tp": [7], - "micro/f1_without_tn/fp": [0], - "micro/f1_without_tn/tn": [21], - "micro/f1_without_tn/fn": [0], - "with_tn/f1_per_label/tp": [0, 2, 3, 2], - "with_tn/f1_per_label/fp": [0, 0, 0, 0], - "with_tn/f1_per_label/tn": [7, 5, 4, 5], - "with_tn/f1_per_label/fn": [0, 0, 0, 0], - "with_tn/macro/f1/tp": [0, 2, 3, 2], - "with_tn/macro/f1/fp": [0, 0, 0, 0], - "with_tn/macro/f1/tn": [7, 5, 4, 5], - "with_tn/macro/f1/fn": [0, 0, 0, 0], - "with_tn/micro/f1/tp": [7], - "with_tn/micro/f1/fp": [0], - "with_tn/micro/f1/tn": [21], - "with_tn/micro/f1/fn": [0], - } - assert metric.compute() == { - "no_relation/f1": tensor(0.0), - "org:founded_by/f1": tensor(1.0), - "per:employee_of/f1": tensor(1.0), - "per:founder/f1": tensor(1.0), - "macro/f1": tensor(1.0), - "micro/f1": tensor(1.0), - "micro/f1_without_tn": tensor(1.0), - } - - metric.reset() - modified_targets = {"labels": torch.tensor([2, 2, 3, 1, 2, 0, 1])} - # three positive matches and one true negative - random_predictions = {"labels": torch.tensor([1, 1, 3, 1, 2, 0, 0])} - metric.update(random_predictions, modified_targets) - state = get_metric_state(metric) - assert state == { - "micro/f1_without_tn/tp": [3], - "micro/f1_without_tn/fp": [3], - "micro/f1_without_tn/tn": [15], - "micro/f1_without_tn/fn": [3], - "with_tn/f1_per_label/tp": [1, 1, 1, 1], - "with_tn/f1_per_label/fp": [1, 2, 0, 0], - "with_tn/f1_per_label/tn": [5, 3, 4, 6], - "with_tn/f1_per_label/fn": [0, 1, 2, 0], - "with_tn/macro/f1/tp": [1, 1, 1, 1], - "with_tn/macro/f1/fp": [1, 2, 0, 0], - "with_tn/macro/f1/tn": [5, 3, 4, 6], - "with_tn/macro/f1/fn": [0, 1, 2, 0], - "with_tn/micro/f1/tp": [4], - "with_tn/micro/f1/fp": [3], - "with_tn/micro/f1/tn": [18], - "with_tn/micro/f1/fn": [3], - } - # created with torch.set_printoptions(precision=6) - torch.testing.assert_close( - metric.compute(), - { - "no_relation/f1": tensor(0.666667), - "org:founded_by/f1": tensor(0.400000), - "per:employee_of/f1": tensor(0.500000), - "per:founder/f1": tensor(1.0), - "macro/f1": tensor(0.641667), - "micro/f1": tensor(0.571429), - "micro/f1_without_tn": tensor(0.500000), - }, - ) - - # no targets and no predictions - metric.reset() - no_targets = {"labels": torch.tensor([0, 0, 0])} - no_predictions = {"labels": torch.tensor([0, 0, 0])} - metric.update(no_targets, no_predictions) - state = get_metric_state(metric) - - assert state == { - "micro/f1_without_tn/tp": [0], - "micro/f1_without_tn/fp": [0], - "micro/f1_without_tn/tn": [0], - "micro/f1_without_tn/fn": [0], - "with_tn/f1_per_label/tp": [3, 0, 0, 0], - "with_tn/f1_per_label/fp": [0, 0, 0, 0], - "with_tn/f1_per_label/tn": [0, 3, 3, 3], - "with_tn/f1_per_label/fn": [0, 0, 0, 0], - "with_tn/macro/f1/tp": [3, 0, 0, 0], - "with_tn/macro/f1/fp": [0, 0, 0, 0], - "with_tn/macro/f1/tn": [0, 3, 3, 3], - "with_tn/macro/f1/fn": [0, 0, 0, 0], - "with_tn/micro/f1/tp": [3], - "with_tn/micro/f1/fp": [0], - "with_tn/micro/f1/tn": [9], - "with_tn/micro/f1/fn": [0], - } - torch.testing.assert_close( - metric.compute(), - { - "micro/f1_without_tn": tensor(0.0), - "no_relation/f1": tensor(1.0), - "org:founded_by/f1": tensor(0.0), - "per:employee_of/f1": tensor(0.0), - "per:founder/f1": tensor(0.0), - "macro/f1": tensor(1.0), - "micro/f1": tensor(1.0), - }, - ) - - # ensure that the metric can be pickled - pickle.dumps(metric) - - -def get_bio_tag(tag_id: int, idx2label: Dict[int, str]) -> str: - if tag_id == 0: - return "O" - tag_id -= 1 - label = idx2label[tag_id // 2] - if tag_id % 2 == 0: - return f"B-{label}" - else: - return f"I-{label}" - - -def test_encode_without_insert_marker_but_argument_tags(documents): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - insert_markers=False, - add_argument_tags_to_input=True, - ) - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - assert len(documents) == 7 - encodings = taskmodule.encode(documents) - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - idx2role = {v: k for k, v in taskmodule.argument_role2idx.items()} - argument_tag_ids = [ - [get_bio_tag(tag_id, idx2role) for tag_id in (argument_tags - 1).tolist() if tag_id >= 0] - for argument_tags in inputs["argument_tags"] - ] - tokens_with_tags = [ - [(tok, tag) for tok, tag in zip(tkns, tags)] - for tkns, tags in zip(tokens, argument_tag_ids) - ] - assert tokens_with_tags == [ - [ - ("[CLS]", "O"), - ("En", "B-head"), - ("##ti", "I-head"), - ("##ty", "I-head"), - ("A", "I-head"), - ("works", "O"), - ("at", "O"), - ("B", "B-tail"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "B-head"), - ("##ti", "I-head"), - ("##ty", "I-head"), - ("G", "I-head"), - ("works", "O"), - ("at", "O"), - ("H", "B-tail"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "O"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "B-head"), - ("##ti", "I-head"), - ("##ty", "I-head"), - ("G", "I-head"), - ("works", "O"), - ("at", "O"), - ("H", "O"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "B-tail"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "O"), - ("##ti", "O"), - ("##ty", "O"), - ("G", "O"), - ("works", "O"), - ("at", "O"), - ("H", "B-tail"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "B-head"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "B-head"), - ("##ti", "I-head"), - ("##ty", "I-head"), - ("M", "I-head"), - ("works", "O"), - ("at", "O"), - ("N", "B-tail"), - (".", "O"), - ("And", "O"), - ("it", "O"), - ("founded", "O"), - ("O", "O"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "O"), - ("##ti", "O"), - ("##ty", "O"), - ("M", "O"), - ("works", "O"), - ("at", "O"), - ("N", "O"), - (".", "O"), - ("And", "O"), - ("it", "B-head"), - ("founded", "O"), - ("O", "B-tail"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "O"), - ("##ti", "O"), - ("##ty", "O"), - ("M", "O"), - ("works", "O"), - ("at", "O"), - ("N", "O"), - (".", "O"), - ("And", "O"), - ("it", "B-tail"), - ("founded", "O"), - ("O", "B-head"), - (".", "O"), - ("[SEP]", "O"), - ], - ] - - -@pytest.mark.parametrize("add_argument_indices_to_input", [True, False]) -def test_encode_without_insert_marker_but_argument_tags_and_windowing( - documents, add_argument_indices_to_input -): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_argument_indices_to_input=add_argument_indices_to_input, - add_argument_tags_to_input=True, - max_window=8, - insert_markers=False, - ) - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - encodings = taskmodule.encode(documents, encode_target=True) - assert len(encodings) == 3 - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - if add_argument_indices_to_input: - arg_spans = [ - get_arg_token_span( - current_tokens, - current_start_indices, - current_end_indices, - taskmodule.argument_role2idx, - ) - for current_tokens, current_start_indices, current_end_indices in zip( - tokens, - inputs["pooler_start_indices"].tolist(), - inputs["pooler_end_indices"].tolist(), - ) - ] - - assert arg_spans == [ - {"head": ["I"], "tail": ["H"]}, - {"head": ["it"], "tail": ["O"]}, - {"head": ["O"], "tail": ["it"]}, - ] - - idx2role = {v: k for k, v in taskmodule.argument_role2idx.items()} - argument_tag_ids = [ - [get_bio_tag(tag_id, idx2role) for tag_id in (argument_tags - 1).tolist() if tag_id >= 0] - for argument_tags in inputs["argument_tags"] - ] - tokens_with_tags = [ - [(tok, tag) for tok, tag in zip(tkns, tags)] - for tkns, tags in zip(tokens, argument_tag_ids) - ] - assert tokens_with_tags == [ - [ - ("[CLS]", "O"), - ("at", "O"), - ("H", "B-tail"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "B-head"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - (".", "O"), - ("And", "O"), - ("it", "B-head"), - ("founded", "O"), - ("O", "B-tail"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - (".", "O"), - ("And", "O"), - ("it", "B-tail"), - ("founded", "O"), - ("O", "B-head"), - (".", "O"), - ("[SEP]", "O"), - ], - ] - - -@pytest.mark.parametrize("insert_markers", [True, False]) -def test_encode_with_add_entity_tags_to_input(documents, insert_markers): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_entity_tags_to_input=True, - insert_markers=insert_markers, - ) - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - encodings = taskmodule.encode(documents) - assert len(encodings) == 7 - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - idx2label = {k: v for k, v in enumerate(taskmodule.entity_labels)} - entity_tag_ids = [ - [get_bio_tag(tag_id, idx2label) for tag_id in (argument_tags - 1).tolist() if tag_id >= 0] - for argument_tags in inputs["entity_tags"] - ] - tokens_with_tags = [ - [(tok, tag) for tok, tag in zip(tkns, tags)] for tkns, tags in zip(tokens, entity_tag_ids) - ] - if insert_markers: - assert tokens_with_tags[:3] == [ - [ - ("[CLS]", "O"), - ("[H]", "O"), - ("En", "B-PER"), - ("##ti", "I-PER"), - ("##ty", "I-PER"), - ("A", "I-PER"), - ("[/H]", "O"), - ("works", "O"), - ("at", "O"), - ("[T]", "O"), - ("B", "B-ORG"), - ("[/T]", "O"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("[H]", "O"), - ("En", "B-PER"), - ("##ti", "I-PER"), - ("##ty", "I-PER"), - ("G", "I-PER"), - ("[/H]", "O"), - ("works", "O"), - ("at", "O"), - ("[T]", "O"), - ("H", "B-ORG"), - ("[/T]", "O"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "B-ORG"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("[H]", "O"), - ("En", "B-PER"), - ("##ti", "I-PER"), - ("##ty", "I-PER"), - ("G", "I-PER"), - ("[/H]", "O"), - ("works", "O"), - ("at", "O"), - ("H", "B-ORG"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("[T]", "O"), - ("I", "B-ORG"), - ("[/T]", "O"), - (".", "O"), - ("[SEP]", "O"), - ], - ] - else: - assert tokens_with_tags[:3] == [ - [ - ("[CLS]", "O"), - ("En", "B-PER"), - ("##ti", "I-PER"), - ("##ty", "I-PER"), - ("A", "I-PER"), - ("works", "O"), - ("at", "O"), - ("B", "B-ORG"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "B-PER"), - ("##ti", "I-PER"), - ("##ty", "I-PER"), - ("G", "I-PER"), - ("works", "O"), - ("at", "O"), - ("H", "B-ORG"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "B-ORG"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - ("First", "O"), - ("sentence", "O"), - (".", "O"), - ("En", "B-PER"), - ("##ti", "I-PER"), - ("##ty", "I-PER"), - ("G", "I-PER"), - ("works", "O"), - ("at", "O"), - ("H", "B-ORG"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "B-ORG"), - (".", "O"), - ("[SEP]", "O"), - ], - ] - - -@pytest.mark.parametrize("insert_markers", [True, False]) -def test_encode_with_add_entity_tags_to_input_windowing(documents, insert_markers): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path=tokenizer_name_or_path, - add_entity_tags_to_input=True, - insert_markers=insert_markers, - max_window=12 if insert_markers else 8, - ) - assert not taskmodule.is_from_pretrained - taskmodule.prepare(documents) - - encodings = taskmodule.encode(documents, encode_target=True) - assert len(encodings) == 3 - batch = taskmodule.collate(encodings) - inputs, targets = batch - tokens = [ - taskmodule.tokenizer.convert_ids_to_tokens(input_ids) for input_ids in inputs["input_ids"] - ] - - idx2label = {k: v for k, v in enumerate(taskmodule.entity_labels)} - entity_tag_ids = [ - [get_bio_tag(tag_id, idx2label) for tag_id in (argument_tags - 1).tolist() if tag_id >= 0] - for argument_tags in inputs["entity_tags"] - ] - tokens_with_tags = [ - [(tok, tag) for tok, tag in zip(tkns, tags)] for tkns, tags in zip(tokens, entity_tag_ids) - ] - - if insert_markers: - assert tokens_with_tags == [ - [ - ("[CLS]", "O"), - ("at", "O"), - ("[T]", "O"), - ("H", "B-ORG"), - ("[/T]", "O"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("[H]", "O"), - ("I", "B-ORG"), - ("[/H]", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - (".", "O"), - ("And", "O"), - ("[H]", "O"), - ("it", "B-PER"), - ("[/H]", "O"), - ("founded", "O"), - ("[T]", "O"), - ("O", "B-ORG"), - ("[/T]", "O"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - (".", "O"), - ("And", "O"), - ("[T]", "O"), - ("it", "B-PER"), - ("[/T]", "O"), - ("founded", "O"), - ("[H]", "O"), - ("O", "B-ORG"), - ("[/H]", "O"), - (".", "O"), - ("[SEP]", "O"), - ], - ] - else: - assert tokens_with_tags == [ - [ - ("[CLS]", "O"), - ("at", "O"), - ("H", "B-ORG"), - (".", "O"), - ("And", "O"), - ("founded", "O"), - ("I", "B-ORG"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - (".", "O"), - ("And", "O"), - ("it", "B-PER"), - ("founded", "O"), - ("O", "B-ORG"), - (".", "O"), - ("[SEP]", "O"), - ], - [ - ("[CLS]", "O"), - (".", "O"), - ("And", "O"), - ("it", "B-PER"), - ("founded", "O"), - ("O", "B-ORG"), - (".", "O"), - ("[SEP]", "O"), - ], - ] - - -@pytest.mark.parametrize("add_candidate_relations", [False, True]) -def test_create_annotations_from_output(add_candidate_relations): - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - # pass in the labels and entity_labels to avoid calling prepare - # (which would required documents to collect the labels from) - labels=["org:founded_by", "per:employee_of", "per:founder"], - entity_labels=["PER", "ORG"], - # we want to test the effect of creating candidate relations - add_candidate_relations=add_candidate_relations, - ) - # just call post_prepare to set up the taskmodule since labels - # and entity_labels are already set - taskmodule.post_prepare() - - entities = [ - LabeledSpan(start=16, end=24, label="PER"), - LabeledSpan(start=34, end=35, label="ORG"), - LabeledSpan(start=49, end=50, label="ORG"), - ] - - assert taskmodule.none_label == "no_relation" - candidate_relations = [ - BinaryRelation(head=entities[0], tail=entities[1], label="no_relation"), - BinaryRelation(head=entities[0], tail=entities[2], label="no_relation"), - BinaryRelation(head=entities[2], tail=entities[1], label="no_relation"), - ] - - # Just create the task encodings with dummy inputs and a dummy document since - # we do not want to pass them into the model, but add correct metadata - # (which is used to create the annotations). - task_encodings = [ - TaskEncoding(inputs={}, metadata={"candidate_annotation": rel}, document=Document()) - for rel in candidate_relations - ] - unbatched_model_outputs = [ - {"labels": ["per:employee_of"], "probabilities": [0.6000000238418579]}, - {"labels": ["per:founder"], "probabilities": [0.5]}, - {"labels": ["no_relation"], "probabilities": [0.6000000238418579]}, - ] - - result_flat = [] - for i in range(len(unbatched_model_outputs)): - result_flat.extend( - list( - taskmodule.create_annotations_from_output( - task_encoding=task_encodings[i], task_output=unbatched_model_outputs[i] - ) - ) - ) - - # The entities need to be added to a document. This is only required to resolve - # the relations later on for better readability! - document = TestDocument(text="First sentence. Entity G works at H. And founded I.") - document.entities.extend(entities) - - # this would be the "model input" - assert [rel.resolve() for rel in candidate_relations] == [ - ("no_relation", (("PER", "Entity G"), ("ORG", "H"))), - ("no_relation", (("PER", "Entity G"), ("ORG", "I"))), - ("no_relation", (("ORG", "I"), ("ORG", "H"))), - ] - - # this is the final "output" - relations_resolved_with_score = [ - (rel.resolve(), round(rel.score, 4)) for _, rel in result_flat - ] - if add_candidate_relations: - # if candidate relations were added, the no-relation is removed - assert relations_resolved_with_score == [ - (("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), 0.6), - (("per:founder", (("PER", "Entity G"), ("ORG", "I"))), 0.5), - ] - else: - # if no candidate relations were added, the no-relation is kept - assert relations_resolved_with_score == [ - (("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), 0.6), - (("per:founder", (("PER", "Entity G"), ("ORG", "I"))), 0.5), - (("no_relation", (("ORG", "I"), ("ORG", "H"))), 0.6), - ] - - -@pytest.mark.parametrize("as_list", [False, True]) -@pytest.mark.parametrize("add_candidate_relations", [False, True]) -def test_create_annotations_from_output_with_argument_and_relation_type_whitelist( - add_candidate_relations, as_list -): - if as_list: - argument_and_relation_type_whitelist = [ - ["per:employee_of", "PER", "ORG"], - ["per:founder", "PER", "ORG"], - ["org:founded_by", "ORG", "PER"], - ["no_relation", "PER", "ORG"], - ["no_relation", "ORG", "PER"], - ] - else: - argument_and_relation_type_whitelist = { - "per:employee_of": [["PER", "ORG"]], - "per:founder": [["PER", "ORG"]], - "org:founded_by": [["ORG", "PER"]], - "no_relation": [["PER", "ORG"], ["ORG", "PER"]], - } - taskmodule = RETextClassificationWithIndicesTaskModule( - relation_annotation="relations", - tokenizer_name_or_path="bert-base-cased", - # pass in the labels and entity_labels to avoid calling prepare - # (which would required documents to collect the labels from) - labels=["org:founded_by", "per:employee_of", "per:founder"], - entity_labels=["PER", "ORG"], - # we want to test the effect of creating candidate relations - add_candidate_relations=add_candidate_relations, - argument_and_relation_type_whitelist=argument_and_relation_type_whitelist, - ) - # just call post_prepare to set up the taskmodule since labels - # and entity_labels are already set - taskmodule.post_prepare() - - entities = [ - LabeledSpan(start=16, end=24, label="PER"), - LabeledSpan(start=34, end=35, label="ORG"), - LabeledSpan(start=49, end=50, label="ORG"), - ] - - assert taskmodule.none_label == "no_relation" - candidate_relations = [ - BinaryRelation(head=entities[0], tail=entities[1], label="no_relation"), - BinaryRelation(head=entities[0], tail=entities[2], label="no_relation"), - BinaryRelation(head=entities[2], tail=entities[0], label="no_relation"), - BinaryRelation(head=entities[2], tail=entities[1], label="no_relation"), - BinaryRelation(head=entities[1], tail=entities[2], label="no_relation"), - ] - - # Just create the task encodings with dummy inputs and a dummy document since - # we do not want to pass them into the model, but add correct metadata - # (which is used to create the annotations). - task_encodings = [ - TaskEncoding(inputs={}, metadata={"candidate_annotation": rel}, document=Document()) - for rel in candidate_relations - ] - unbatched_model_outputs = [ - {"labels": ["per:employee_of"], "probabilities": [0.6000000238418579]}, - {"labels": ["per:founder"], "probabilities": [0.5]}, - {"labels": ["no_relation"], "probabilities": [0.6000000238418579]}, - {"labels": ["org:founded_by"], "probabilities": [0.6000000238418579]}, - {"labels": ["no_relation"], "probabilities": [0.6000000238418579]}, - ] - - result_flat = [] - for i in range(len(unbatched_model_outputs)): - result_flat.extend( - list( - taskmodule.create_annotations_from_output( - task_encoding=task_encodings[i], task_output=unbatched_model_outputs[i] - ) - ) - ) - - # The entities need to be added to a document. This is only required to resolve - # the relations later on for better readability! - document = TestDocument(text="First sentence. Entity G works at H. And founded I.") - document.entities.extend(entities) - - # this would be the "model input" - assert [rel.resolve() for rel in candidate_relations] == [ - ("no_relation", (("PER", "Entity G"), ("ORG", "H"))), - ("no_relation", (("PER", "Entity G"), ("ORG", "I"))), - ("no_relation", (("ORG", "I"), ("PER", "Entity G"))), - ("no_relation", (("ORG", "I"), ("ORG", "H"))), - ("no_relation", (("ORG", "H"), ("ORG", "I"))), - ] - - # this is the final "output" - relations_resolved_with_score = [ - (rel.resolve(), round(rel.score, 4)) for _, rel in result_flat - ] - if add_candidate_relations: - # if candidate relations were added, no-relations are removed - # relations with wrong entity types are also removed. - assert relations_resolved_with_score == [ - (("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), 0.6), - (("per:founder", (("PER", "Entity G"), ("ORG", "I"))), 0.5), - ] - else: - # if no candidate relations were added, only relations not fitting the filter - # are removed. We explicitly need to add "no_relation" with possible argument types - # to whitelist if we don't want them to be filtered. - assert relations_resolved_with_score == [ - (("per:employee_of", (("PER", "Entity G"), ("ORG", "H"))), 0.6), - (("per:founder", (("PER", "Entity G"), ("ORG", "I"))), 0.5), - (("no_relation", (("ORG", "I"), ("PER", "Entity G"))), 0.6), - ] diff --git a/tests/taskmodules/test_text2text.py b/tests/taskmodules/test_text2text.py deleted file mode 100644 index 7dc779acb..000000000 --- a/tests/taskmodules/test_text2text.py +++ /dev/null @@ -1,275 +0,0 @@ -import pickle -from typing import Any, Dict, List, Sequence, Tuple - -import pytest -import torch -from pie_core import Annotation, TaskEncoding - -from pie_modules.annotations import AbstractiveSummary -from pie_modules.documents import ( - TextDocumentWithAbstractiveSummary, - TokenDocumentWithAbstractiveSummary, -) -from pie_modules.models.common import VALIDATION -from pie_modules.taskmodules import TextToTextTaskModule -from pie_modules.taskmodules.text_to_text import ( - InputEncodingType, - TargetEncodingType, - TaskEncodingType, - TaskOutputType, -) - - -@pytest.fixture(scope="module") -def documents(): - result = [] - - doc = TextDocumentWithAbstractiveSummary(text="This is a test document") - summary = AbstractiveSummary(text="a document") - doc.abstractive_summary.append(summary) - result.append(doc) - - doc = TextDocumentWithAbstractiveSummary( - text="This is another test document which is a bit longer" - ) - summary = AbstractiveSummary(text="a longer document") - doc.abstractive_summary.append(summary) - result.append(doc) - - return result - - -@pytest.fixture(scope="module") -def taskmodule(): - return TextToTextTaskModule( - tokenizer_name_or_path="google/t5-efficient-tiny-nl2", - document_type="pie_modules.documents.TextDocumentWithAbstractiveSummary", - target_layer="abstractive_summary", - target_annotation_type="pie_modules.annotations.AbstractiveSummary", - tokenized_document_type="pie_modules.documents.TokenDocumentWithAbstractiveSummary", - text_metric_type="torchmetrics.text.ROUGEScore", - ) - - -def test_taskmodule(taskmodule): - assert taskmodule is not None - assert taskmodule.document_type == TextDocumentWithAbstractiveSummary - assert taskmodule.tokenized_document_type == TokenDocumentWithAbstractiveSummary - assert taskmodule.target_annotation_type == AbstractiveSummary - assert taskmodule.layer_names == ["abstractive_summary"] - assert taskmodule.generation_config == {} - - -@pytest.fixture(scope="module") -def task_encodings(taskmodule, documents) -> Sequence[TaskEncodingType]: - encodings = taskmodule.encode(documents, encode_target=True) - assert all(isinstance(encoding, TaskEncoding) for encoding in encodings) - assert len(encodings) == 2 == len(documents) - assert encodings[0].document == documents[0] - assert encodings[1].document == documents[1] - return encodings - - -def test_maybe_log_example(taskmodule, task_encodings, caplog): - counter_backup = taskmodule.log_first_n_examples - - taskmodule.log_first_n_examples = 1 - with caplog.at_level("INFO"): - taskmodule.maybe_log_example(task_encodings[0]) - - assert len(caplog.messages) == 3 - assert caplog.messages[0] == "input_ids: [100, 19, 3, 9, 794, 1708, 1]" - assert caplog.messages[1] == "attention_mask: [1, 1, 1, 1, 1, 1, 1]" - assert caplog.messages[2] == "labels: [3, 9, 1708, 1]" - - taskmodule.log_first_n_examples = counter_backup - - -@pytest.fixture(scope="module") -def input_encoding(taskmodule, task_encodings) -> InputEncodingType: - assert len(task_encodings) > 0 - return task_encodings[0].inputs - - -def test_input_encoding(taskmodule, input_encoding): - assert isinstance(input_encoding, InputEncodingType) - assert input_encoding.input_ids == [100, 19, 3, 9, 794, 1708, 1] - assert input_encoding.attention_mask == [1, 1, 1, 1, 1, 1, 1] - - tokens = taskmodule.tokenizer.convert_ids_to_tokens(input_encoding.input_ids) - assert tokens == ["▁This", "▁is", "▁", "a", "▁test", "▁document", ""] - - -@pytest.fixture(scope="module") -def metadata(taskmodule, task_encodings) -> Dict[str, Any]: - assert len(task_encodings) > 0 - return task_encodings[0].metadata - - -def test_metadata(taskmodule, metadata): - assert set(metadata) == {"tokenized_document", "guidance_annotation"} - - tokenized_document = metadata["tokenized_document"] - assert isinstance(tokenized_document, TokenDocumentWithAbstractiveSummary) - assert tokenized_document.tokens == ("▁This", "▁is", "▁", "a", "▁test", "▁document", "") - assert len(tokenized_document.abstractive_summary) == 1 - assert tokenized_document.abstractive_summary[0].text == "a document" - - -@pytest.fixture(scope="module") -def target_encoding(taskmodule, task_encodings) -> TargetEncodingType: - assert len(task_encodings) > 0 - return task_encodings[0].targets - - -def test_target_encoding(taskmodule, target_encoding): - assert isinstance(target_encoding, TargetEncodingType) - assert target_encoding.labels == [3, 9, 1708, 1] - assert target_encoding.decoder_attention_mask == [1, 1, 1, 1] - - -@pytest.fixture(scope="module") -def batch(taskmodule, task_encodings) -> List[TaskEncodingType]: - result = taskmodule.collate(task_encodings) - return result - - -def test_batch(taskmodule, batch): - assert len(batch) == 2 - inputs, targets = batch - - assert set(inputs) == {"input_ids", "attention_mask"} - torch.testing.assert_close( - inputs["input_ids"], - torch.tensor( - [ - [100, 19, 3, 9, 794, 1708, 1, 0, 0, 0, 0, 0], - [100, 19, 430, 794, 1708, 84, 19, 3, 9, 720, 1200, 1], - ] - ), - ) - torch.testing.assert_close( - inputs["attention_mask"], - torch.tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), - ) - - assert set(targets) == {"labels", "decoder_attention_mask"} - torch.testing.assert_close( - targets["labels"], torch.tensor([[3, 9, 1708, 1, 0], [3, 9, 1200, 1708, 1]]) - ) - torch.testing.assert_close( - targets["decoder_attention_mask"], torch.tensor([[1, 1, 1, 1, 0], [1, 1, 1, 1, 1]]) - ) - - -@pytest.fixture(scope="module") -def unbatched_output(taskmodule, batch) -> Sequence[TaskOutputType]: - inputs, targets = batch - return taskmodule.unbatch_output(targets) - - -def test_unbatched_output(taskmodule, unbatched_output): - assert all(isinstance(output, TargetEncodingType) for output in unbatched_output) - assert len(unbatched_output) == 2 - - assert unbatched_output[0].labels == [3, 9, 1708, 1] - assert unbatched_output[0].decoder_attention_mask is None - - assert unbatched_output[1].labels == [3, 9, 1200, 1708, 1] - assert unbatched_output[1].decoder_attention_mask is None - - -@pytest.fixture(scope="module") -def decoded_annotations( - taskmodule, task_encodings, unbatched_output -) -> List[Tuple[str, Annotation]]: - result = [] - for encoding, output in zip(task_encodings, unbatched_output): - result.extend( - taskmodule.create_annotations_from_output(task_encoding=encoding, task_output=output) - ) - return result - - -def test_decoded_annotations(taskmodule, decoded_annotations): - names, annotations = zip(*decoded_annotations) - assert all(layer_name == taskmodule.target_layer for layer_name in names) - assert all( - isinstance(annotation, taskmodule.target_annotation_type) for annotation in annotations - ) - - assert len(annotations) == 2 - assert annotations[0].text == "a document" - assert annotations[0].score is None - assert annotations[1].text == "a longer document" - assert annotations[1].score is None - - -def test_configure_model_metrics(taskmodule): - metric = taskmodule.configure_model_metric(stage=VALIDATION) - assert metric is not None - values = metric.compute() - keys = { - "rouge2_fmeasure", - "rougeL_recall", - "rouge1_precision", - "rouge1_recall", - "rouge2_recall", - "rougeL_precision", - "rouge1_fmeasure", - "rougeLsum_recall", - "rougeLsum_precision", - "rougeL_fmeasure", - "rouge2_precision", - "rougeLsum_fmeasure", - } - assert set(values) == keys - assert all(torch.isnan(value) for value in values.values()) - - labels = torch.tensor([[3, 9, 1708, 1, 0], [3, 9, 1200, 1708, 1]]) - metric.update(prediction={"labels": labels}, target={"labels": labels}) - assert set(metric.metric_state) == keys - assert all( - value == [torch.tensor(1.0), torch.tensor(1.0)] for value in metric.metric_state.values() - ) - values = metric.compute() - assert set(values) == keys - assert all(value == torch.tensor(1.0) for value in values.values()) - - random_labels = torch.tensor([[875, 885, 112, 289, 769], [270, 583, 970, 114, 71]]) - metric.update(prediction={"labels": random_labels}, target={"labels": labels}) - values = metric.compute() - assert {k: v.item() for k, v in values.items()} == { - "rouge1_fmeasure": 0.5625, - "rouge1_precision": 0.550000011920929, - "rouge1_recall": 0.5833333134651184, - "rouge2_fmeasure": 0.5, - "rouge2_precision": 0.5, - "rouge2_recall": 0.5, - "rougeL_fmeasure": 0.5625, - "rougeL_precision": 0.550000011920929, - "rougeL_recall": 0.5833333134651184, - "rougeLsum_fmeasure": 0.5625, - "rougeLsum_precision": 0.550000011920929, - "rougeLsum_recall": 0.5833333134651184, - } - - # ensure that the metric can be pickled - pickle.dumps(metric) - - -def test_configure_model_generation(taskmodule): - generation_config = taskmodule.configure_model_generation() - assert generation_config is not None - assert generation_config == {} - - -def test_warn_once(taskmodule, caplog): - with caplog.at_level("WARNING"): - taskmodule.warn_only_once("test") - taskmodule.warn_only_once("test") - taskmodule.warn_only_once("test2") - - assert len(caplog.messages) == 2 - assert caplog.messages[0] == "test (This warning will only be shown once)" - assert caplog.messages[1] == "test2 (This warning will only be shown once)" diff --git a/tests/taskmodules/test_text2text_with_guidance.py b/tests/taskmodules/test_text2text_with_guidance.py deleted file mode 100644 index c66711610..000000000 --- a/tests/taskmodules/test_text2text_with_guidance.py +++ /dev/null @@ -1,240 +0,0 @@ -from typing import Any, Dict, List, Sequence, Tuple - -import pytest -import torch -from pie_core import Annotation, TaskEncoding - -from pie_modules.annotations import GenerativeAnswer, Question -from pie_modules.documents import ( - TextDocumentWithQuestionsAndGenerativeAnswers, - TokenDocumentWithQuestionsAndGenerativeAnswers, -) -from pie_modules.taskmodules import TextToTextTaskModule -from pie_modules.taskmodules.text_to_text import ( - InputEncodingType, - TargetEncodingType, - TaskEncodingType, - TaskOutputType, -) - - -@pytest.fixture(scope="module") -def documents(): - result = [] - - doc = TextDocumentWithQuestionsAndGenerativeAnswers(text="This is a test document") - question = Question(text="What is this?") - doc.questions.append(question) - answer = GenerativeAnswer(text="a document", question=question) - doc.generative_answers.append(answer) - result.append(doc) - - doc = TextDocumentWithQuestionsAndGenerativeAnswers( - text="This is another test document which is a bit longer." - ) - question = Question(text="And what is this?") - doc.questions.append(question) - answer = GenerativeAnswer(text="a longer document", question=question) - doc.generative_answers.append(answer) - result.append(doc) - - return result - - -@pytest.fixture(scope="module") -def taskmodule(): - return TextToTextTaskModule( - tokenizer_name_or_path="google/t5-efficient-tiny-nl2", - document_type="pie_modules.documents.TextDocumentWithQuestionsAndGenerativeAnswers", - target_layer="generative_answers", - target_annotation_type="pie_modules.annotations.GenerativeAnswer", - tokenized_document_type="pie_modules.documents.TokenDocumentWithQuestionsAndGenerativeAnswers", - guidance_layer="questions", - guidance_annotation_field="question", - text_metric_type="torchmetrics.text.ROUGEScore", - ) - - -def test_taskmodule(taskmodule): - assert taskmodule is not None - assert taskmodule.document_type == TextDocumentWithQuestionsAndGenerativeAnswers - assert taskmodule.tokenized_document_type == TokenDocumentWithQuestionsAndGenerativeAnswers - assert taskmodule.target_annotation_type == GenerativeAnswer - assert taskmodule.layer_names == ["generative_answers"] - assert taskmodule.generation_config == {} - - -@pytest.fixture(scope="module") -def task_encodings(taskmodule, documents) -> Sequence[TaskEncodingType]: - encodings = taskmodule.encode(documents, encode_target=True) - assert all(isinstance(encoding, TaskEncoding) for encoding in encodings) - assert len(encodings) == 2 == len(documents) - assert encodings[0].document == documents[0] - assert encodings[1].document == documents[1] - return encodings - - -def test_maybe_log_example(taskmodule, task_encodings, caplog): - counter_backup = taskmodule.log_first_n_examples - - taskmodule.log_first_n_examples = 1 - with caplog.at_level("INFO"): - taskmodule.maybe_log_example(task_encodings[0]) - - assert len(caplog.messages) == 3 - assert caplog.messages[0] == "input_ids: [363, 19, 48, 58, 1, 100, 19, 3, 9, 794, 1708, 1]" - assert caplog.messages[1] == "attention_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]" - assert caplog.messages[2] == "labels: [3, 9, 1708, 1]" - - taskmodule.log_first_n_examples = counter_backup - - -@pytest.fixture(scope="module") -def input_encoding(taskmodule, task_encodings) -> InputEncodingType: - assert len(task_encodings) > 0 - return task_encodings[0].inputs - - -def test_input_encoding(taskmodule, input_encoding): - assert isinstance(input_encoding, InputEncodingType) - assert input_encoding.input_ids == [363, 19, 48, 58, 1, 100, 19, 3, 9, 794, 1708, 1] - assert input_encoding.attention_mask == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - - tokens = taskmodule.tokenizer.convert_ids_to_tokens(input_encoding.input_ids) - assert tokens == [ - "▁What", - "▁is", - "▁this", - "?", - "", - "▁This", - "▁is", - "▁", - "a", - "▁test", - "▁document", - "", - ] - - -@pytest.fixture(scope="module") -def metadata(taskmodule, task_encodings) -> Dict[str, Any]: - assert len(task_encodings) > 0 - return task_encodings[0].metadata - - -def test_metadata(taskmodule, metadata): - assert set(metadata) == {"tokenized_document", "guidance_annotation"} - - tokenized_document = metadata["tokenized_document"] - assert isinstance(tokenized_document, TokenDocumentWithQuestionsAndGenerativeAnswers) - assert tokenized_document.tokens == ( - "▁What", - "▁is", - "▁this", - "?", - "", - "▁This", - "▁is", - "▁", - "a", - "▁test", - "▁document", - "", - ) - assert len(tokenized_document.questions) == 1 - assert tokenized_document.questions[0].text == "What is this?" - - -@pytest.fixture(scope="module") -def target_encoding(taskmodule, task_encodings) -> TargetEncodingType: - assert len(task_encodings) > 0 - return task_encodings[0].targets - - -def test_target_encoding(taskmodule, target_encoding): - assert isinstance(target_encoding, TargetEncodingType) - assert target_encoding.labels == [3, 9, 1708, 1] - assert target_encoding.decoder_attention_mask == [1, 1, 1, 1] - - -@pytest.fixture(scope="module") -def batch(taskmodule, task_encodings) -> List[TaskEncodingType]: - result = taskmodule.collate(task_encodings) - return result - - -def test_batch(taskmodule, batch): - assert len(batch) == 2 - inputs, targets = batch - - assert set(inputs) == {"input_ids", "attention_mask"} - torch.testing.assert_close( - inputs["input_ids"], - torch.tensor( - [ - [363, 19, 48, 58, 1, 100, 19, 3, 9, 794, 1708, 1, 0, 0, 0, 0, 0, 0, 0], - [275, 125, 19, 48, 58, 1, 100, 19, 430, 794, 1708, 84, 19, 3, 9, 720, 1200, 5, 1], - ] - ), - ) - torch.testing.assert_close( - inputs["attention_mask"], - torch.tensor( - [ - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - ] - ), - ) - - assert set(targets) == {"labels", "decoder_attention_mask"} - torch.testing.assert_close( - targets["labels"], torch.tensor([[3, 9, 1708, 1, 0], [3, 9, 1200, 1708, 1]]) - ) - torch.testing.assert_close( - targets["decoder_attention_mask"], torch.tensor([[1, 1, 1, 1, 0], [1, 1, 1, 1, 1]]) - ) - - -@pytest.fixture(scope="module") -def unbatched_output(taskmodule, batch) -> Sequence[TaskOutputType]: - inputs, targets = batch - return taskmodule.unbatch_output(targets) - - -def test_unbatched_output(taskmodule, unbatched_output): - assert all(isinstance(output, TargetEncodingType) for output in unbatched_output) - assert len(unbatched_output) == 2 - - assert unbatched_output[0].labels == [3, 9, 1708, 1] - assert unbatched_output[0].decoder_attention_mask is None - - assert unbatched_output[1].labels == [3, 9, 1200, 1708, 1] - assert unbatched_output[1].decoder_attention_mask is None - - -@pytest.fixture(scope="module") -def decoded_annotations( - taskmodule, task_encodings, unbatched_output -) -> List[Tuple[str, Annotation]]: - result = [] - for encoding, output in zip(task_encodings, unbatched_output): - result.extend( - taskmodule.create_annotations_from_output(task_encoding=encoding, task_output=output) - ) - return result - - -def test_decoded_annotations(taskmodule, decoded_annotations): - names, annotations = zip(*decoded_annotations) - assert all(layer_name == taskmodule.target_layer for layer_name in names) - assert all( - isinstance(annotation, taskmodule.target_annotation_type) for annotation in annotations - ) - - assert len(annotations) == 2 - assert annotations[0].text == "a document" - assert annotations[0].score is None - assert annotations[1].text == "a longer document" - assert annotations[1].score is None From 9a187518faf58fc852dfeb56865eb1a4fc76e5a7 Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Sun, 27 Apr 2025 14:13:36 +0200 Subject: [PATCH 02/11] re-add pytorch source --- poetry.lock | 1982 ++++++++++++++++++++++-------------------------- pyproject.toml | 5 + 2 files changed, 909 insertions(+), 1078 deletions(-) diff --git a/poetry.lock b/poetry.lock index 911cda7bf..77e95ccc0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,48 +1,35 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. - -[[package]] -name = "absl-py" -version = "1.4.0" -description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py." -optional = false -python-versions = ">=3.6" -groups = ["main"] -files = [ - {file = "absl-py-1.4.0.tar.gz", hash = "sha256:d2c244d01048ba476e7c080bd2c6df5e141d211de80223460d5b3b8a2a58433d"}, - {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"}, -] +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "accelerate" -version = "0.32.1" +version = "1.6.0" description = "Accelerate" optional = false -python-versions = ">=3.8.0" -groups = ["dev"] +python-versions = ">=3.9.0" files = [ - {file = "accelerate-0.32.1-py3-none-any.whl", hash = "sha256:71fcf4be00872194071de561634268b71417d7f5b16b178e2fa76b6f117c52b0"}, - {file = "accelerate-0.32.1.tar.gz", hash = "sha256:3999acff0237cd0d4f9fd98b42d5a3163544777b53fc4f1eec886b77e992d177"}, + {file = "accelerate-1.6.0-py3-none-any.whl", hash = "sha256:1aee717d3d3735ad6d09710a7c26990ee4652b79b4e93df46551551b5227c2aa"}, + {file = "accelerate-1.6.0.tar.gz", hash = "sha256:28c1ef1846e690944f98b68dc7b8bb6c51d032d45e85dcbb3adb0c8b99dffb32"}, ] [package.dependencies] -huggingface-hub = "*" -numpy = ">=1.17,<2.0.0" +huggingface-hub = ">=0.21.0" +numpy = ">=1.17,<3.0.0" packaging = ">=20.0" psutil = "*" pyyaml = "*" -safetensors = ">=0.3.1" -torch = ">=1.10.0" +safetensors = ">=0.4.3" +torch = ">=2.0.0" [package.extras] -deepspeed = ["deepspeed (<=0.14.0)"] -dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "diffusers", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.2.1,<0.3.0)", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] -quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.2.1,<0.3.0)"] +deepspeed = ["deepspeed"] +dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "diffusers", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.11.2,<0.12.0)", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.11.2,<0.12.0)"] rich = ["rich"] sagemaker = ["sagemaker"] -test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] -test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist"] -test-trackers = ["comet-ml", "dvclive", "tensorboard", "wandb"] -testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist"] +test-trackers = ["comet-ml", "dvclive", "matplotlib", "mlflow", "tensorboard", "wandb"] +testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] [[package]] name = "aiohttp" @@ -50,7 +37,6 @@ version = "3.9.5" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"}, {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"}, @@ -139,7 +125,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.0,<2.0" [package.extras] -speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +speedups = ["Brotli", "aiodns", "brotlicffi"] [[package]] name = "aiosignal" @@ -147,7 +133,6 @@ version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.7" -groups = ["main"] files = [ {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, @@ -162,8 +147,6 @@ version = "4.0.3" description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" -groups = ["main"] -markers = "python_version < \"3.11\"" files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -175,7 +158,6 @@ version = "23.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" -groups = ["main"] files = [ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, @@ -186,23 +168,23 @@ cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] dev = ["attrs[tests]", "pre-commit"] docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-mypy = ["mypy (>=1.6) ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\""] -tests-no-zope = ["attrs[tests-mypy]", "cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] [[package]] name = "beautifulsoup4" -version = "4.12.3" +version = "4.13.4" description = "Screen-scraping library" optional = false -python-versions = ">=3.6.0" -groups = ["dev"] +python-versions = ">=3.7.0" files = [ - {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, - {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, + {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, + {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, ] [package.dependencies] soupsieve = ">1.2" +typing-extensions = ">=4.0.0" [package.extras] cchardet = ["cchardet"] @@ -213,34 +195,32 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.34.146" +version = "1.38.3" description = "The AWS SDK for Python" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "boto3-1.34.146-py3-none-any.whl", hash = "sha256:7ec568fb19bce82a70be51f08fddac1ef927ca3fb0896cbb34303a012ba228d8"}, - {file = "boto3-1.34.146.tar.gz", hash = "sha256:5686fe2a6d1aa1de8a88e9589cdcc33361640d3d7a13da718a30717248886124"}, + {file = "boto3-1.38.3-py3-none-any.whl", hash = "sha256:9218f86e2164e1bddb75d435bbde4fa651aa58687213d7e3e1b50f7eb8868f66"}, + {file = "boto3-1.38.3.tar.gz", hash = "sha256:655d51abcd68a40a33c52dbaa2ca73fc63c746b894e2ae22ed8ddc1912ddd93f"}, ] [package.dependencies] -botocore = ">=1.34.146,<1.35.0" +botocore = ">=1.38.3,<1.39.0" jmespath = ">=0.7.1,<2.0.0" -s3transfer = ">=0.10.0,<0.11.0" +s3transfer = ">=0.12.0,<0.13.0" [package.extras] crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.146" +version = "1.38.3" description = "Low-level, data-driven core of boto 3." optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "botocore-1.34.146-py3-none-any.whl", hash = "sha256:3fd4782362bd29c192704ebf859c5c8c5189ad05719e391eefe23088434427ae"}, - {file = "botocore-1.34.146.tar.gz", hash = "sha256:849cb8e54e042443aeabcd7822b5f2b76cb5cfe33fe3a71f91c7c069748a869c"}, + {file = "botocore-1.38.3-py3-none-any.whl", hash = "sha256:96f823240fe3704b99c17d1d1b2fd2d1679cf56d2a55b095f00255b76087cbf0"}, + {file = "botocore-1.38.3.tar.gz", hash = "sha256:790f8f966201781f5fcf486d48b4492e9f734446bbf9d19ef8159d08be854243"}, ] [package.dependencies] @@ -252,18 +232,17 @@ urllib3 = [ ] [package.extras] -crt = ["awscrt (==0.20.11)"] +crt = ["awscrt (==0.23.8)"] [[package]] name = "bpemb" -version = "0.3.5" +version = "0.3.6" description = "Byte-pair embeddings in 275 languages" optional = false python-versions = "*" -groups = ["dev"] files = [ - {file = "bpemb-0.3.5-py3-none-any.whl", hash = "sha256:c8b5ba9255d93341073a2a3ab335463660ffd44b9bb96b86156215ada22b241a"}, - {file = "bpemb-0.3.5.tar.gz", hash = "sha256:6d1254d3ccdb9617f064c9ae5c995a7d0d621cf5db7d3cdc1fe3cb2000b2601b"}, + {file = "bpemb-0.3.6-py3-none-any.whl", hash = "sha256:6eabc133bbd0a7dbeb52b2cfed55ca5cacbb38b236ebb1f504b279a2d835e8b7"}, + {file = "bpemb-0.3.6.tar.gz", hash = "sha256:a33fa1dcdfaf3d4cb3eaebac430b6f23a684a888e1761f5a026ce3868153ee2d"}, ] [package.dependencies] @@ -279,7 +258,6 @@ version = "2024.2.2" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" -groups = ["main", "dev"] files = [ {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, @@ -291,7 +269,6 @@ version = "3.4.0" description = "Validate configuration and produce human readable error messages." optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, @@ -303,7 +280,6 @@ version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" -groups = ["main", "dev"] files = [ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, @@ -399,14 +375,13 @@ files = [ [[package]] name = "click" -version = "8.1.7" +version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, ] [package.dependencies] @@ -418,23 +393,20 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} [[package]] name = "conllu" -version = "5.0.1" +version = "6.0.0" description = "CoNLL-U Parser parses a CoNLL-U formatted string into a nested python dictionary" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ - {file = "conllu-5.0.1-py3-none-any.whl", hash = "sha256:19f46a8b61433e6ad7f06ffcafe72a18405821f8fda710f8579b95371a3cb3f9"}, - {file = "conllu-5.0.1.tar.gz", hash = "sha256:1ce417575853c58910068ab05323ccf98135255db2afa0ff154ff29628484b6b"}, + {file = "conllu-6.0.0-py3-none-any.whl", hash = "sha256:c47206a0912f768bfae429d3d3c2c7f5ed068babd2502663e865cfb21532cbcc"}, + {file = "conllu-6.0.0.tar.gz", hash = "sha256:bc6072d49d00e77f4454039519118c0500fafa0d0eb509f53793081084f50aba"}, ] [package.extras] @@ -442,67 +414,87 @@ test = ["tox"] [[package]] name = "contourpy" -version = "1.2.1" +version = "1.3.0" description = "Python library for calculating contours of 2D quadrilateral grids" optional = false python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "contourpy-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bd7c23df857d488f418439686d3b10ae2fbf9bc256cd045b37a8c16575ea1040"}, - {file = "contourpy-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5b9eb0ca724a241683c9685a484da9d35c872fd42756574a7cfbf58af26677fd"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c75507d0a55378240f781599c30e7776674dbaf883a46d1c90f37e563453480"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11959f0ce4a6f7b76ec578576a0b61a28bdc0696194b6347ba3f1c53827178b9"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb3315a8a236ee19b6df481fc5f997436e8ade24a9f03dfdc6bd490fea20c6da"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39f3ecaf76cd98e802f094e0d4fbc6dc9c45a8d0c4d185f0f6c2234e14e5f75b"}, - {file = "contourpy-1.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:94b34f32646ca0414237168d68a9157cb3889f06b096612afdd296003fdd32fd"}, - {file = "contourpy-1.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:457499c79fa84593f22454bbd27670227874cd2ff5d6c84e60575c8b50a69619"}, - {file = "contourpy-1.2.1-cp310-cp310-win32.whl", hash = "sha256:ac58bdee53cbeba2ecad824fa8159493f0bf3b8ea4e93feb06c9a465d6c87da8"}, - {file = "contourpy-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:9cffe0f850e89d7c0012a1fb8730f75edd4320a0a731ed0c183904fe6ecfc3a9"}, - {file = "contourpy-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6022cecf8f44e36af10bd9118ca71f371078b4c168b6e0fab43d4a889985dbb5"}, - {file = "contourpy-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef5adb9a3b1d0c645ff694f9bca7702ec2c70f4d734f9922ea34de02294fdf72"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6150ffa5c767bc6332df27157d95442c379b7dce3a38dff89c0f39b63275696f"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c863140fafc615c14a4bf4efd0f4425c02230eb8ef02784c9a156461e62c965"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:00e5388f71c1a0610e6fe56b5c44ab7ba14165cdd6d695429c5cd94021e390b2"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4492d82b3bc7fbb7e3610747b159869468079fe149ec5c4d771fa1f614a14df"}, - {file = "contourpy-1.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:49e70d111fee47284d9dd867c9bb9a7058a3c617274900780c43e38d90fe1205"}, - {file = "contourpy-1.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b59c0ffceff8d4d3996a45f2bb6f4c207f94684a96bf3d9728dbb77428dd8cb8"}, - {file = "contourpy-1.2.1-cp311-cp311-win32.whl", hash = "sha256:7b4182299f251060996af5249c286bae9361fa8c6a9cda5efc29fe8bfd6062ec"}, - {file = "contourpy-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2855c8b0b55958265e8b5888d6a615ba02883b225f2227461aa9127c578a4922"}, - {file = "contourpy-1.2.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:62828cada4a2b850dbef89c81f5a33741898b305db244904de418cc957ff05dc"}, - {file = "contourpy-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:309be79c0a354afff9ff7da4aaed7c3257e77edf6c1b448a779329431ee79d7e"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e785e0f2ef0d567099b9ff92cbfb958d71c2d5b9259981cd9bee81bd194c9a4"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1cac0a8f71a041aa587410424ad46dfa6a11f6149ceb219ce7dd48f6b02b87a7"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af3f4485884750dddd9c25cb7e3915d83c2db92488b38ccb77dd594eac84c4a0"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ce6889abac9a42afd07a562c2d6d4b2b7134f83f18571d859b25624a331c90b"}, - {file = "contourpy-1.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a1eea9aecf761c661d096d39ed9026574de8adb2ae1c5bd7b33558af884fb2ce"}, - {file = "contourpy-1.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:187fa1d4c6acc06adb0fae5544c59898ad781409e61a926ac7e84b8f276dcef4"}, - {file = "contourpy-1.2.1-cp312-cp312-win32.whl", hash = "sha256:c2528d60e398c7c4c799d56f907664673a807635b857df18f7ae64d3e6ce2d9f"}, - {file = "contourpy-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:1a07fc092a4088ee952ddae19a2b2a85757b923217b7eed584fdf25f53a6e7ce"}, - {file = "contourpy-1.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bb6834cbd983b19f06908b45bfc2dad6ac9479ae04abe923a275b5f48f1a186b"}, - {file = "contourpy-1.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1d59e739ab0e3520e62a26c60707cc3ab0365d2f8fecea74bfe4de72dc56388f"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd3db01f59fdcbce5b22afad19e390260d6d0222f35a1023d9adc5690a889364"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a12a813949e5066148712a0626895c26b2578874e4cc63160bb007e6df3436fe"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe0ccca550bb8e5abc22f530ec0466136379c01321fd94f30a22231e8a48d985"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1d59258c3c67c865435d8fbeb35f8c59b8bef3d6f46c1f29f6123556af28445"}, - {file = "contourpy-1.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f32c38afb74bd98ce26de7cc74a67b40afb7b05aae7b42924ea990d51e4dac02"}, - {file = "contourpy-1.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d31a63bc6e6d87f77d71e1abbd7387ab817a66733734883d1fc0021ed9bfa083"}, - {file = "contourpy-1.2.1-cp39-cp39-win32.whl", hash = "sha256:ddcb8581510311e13421b1f544403c16e901c4e8f09083c881fab2be80ee31ba"}, - {file = "contourpy-1.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:10a37ae557aabf2509c79715cd20b62e4c7c28b8cd62dd7d99e5ed3ce28c3fd9"}, - {file = "contourpy-1.2.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a31f94983fecbac95e58388210427d68cd30fe8a36927980fab9c20062645609"}, - {file = "contourpy-1.2.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef2b055471c0eb466033760a521efb9d8a32b99ab907fc8358481a1dd29e3bd3"}, - {file = "contourpy-1.2.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b33d2bc4f69caedcd0a275329eb2198f560b325605810895627be5d4b876bf7f"}, - {file = "contourpy-1.2.1.tar.gz", hash = "sha256:4d8908b3bee1c889e547867ca4cdc54e5ab6be6d3e078556814a22457f49423c"}, +files = [ + {file = "contourpy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:880ea32e5c774634f9fcd46504bf9f080a41ad855f4fef54f5380f5133d343c7"}, + {file = "contourpy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:76c905ef940a4474a6289c71d53122a4f77766eef23c03cd57016ce19d0f7b42"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f8557cbb07415a4d6fa191f20fd9d2d9eb9c0b61d1b2f52a8926e43c6e9af7"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36f965570cff02b874773c49bfe85562b47030805d7d8360748f3eca570f4cab"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cacd81e2d4b6f89c9f8a5b69b86490152ff39afc58a95af002a398273e5ce589"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69375194457ad0fad3a839b9e29aa0b0ed53bb54db1bfb6c3ae43d111c31ce41"}, + {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a52040312b1a858b5e31ef28c2e865376a386c60c0e248370bbea2d3f3b760d"}, + {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3faeb2998e4fcb256542e8a926d08da08977f7f5e62cf733f3c211c2a5586223"}, + {file = "contourpy-1.3.0-cp310-cp310-win32.whl", hash = "sha256:36e0cff201bcb17a0a8ecc7f454fe078437fa6bda730e695a92f2d9932bd507f"}, + {file = "contourpy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:87ddffef1dbe5e669b5c2440b643d3fdd8622a348fe1983fad7a0f0ccb1cd67b"}, + {file = "contourpy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fa4c02abe6c446ba70d96ece336e621efa4aecae43eaa9b030ae5fb92b309ad"}, + {file = "contourpy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:834e0cfe17ba12f79963861e0f908556b2cedd52e1f75e6578801febcc6a9f49"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbc4c3217eee163fa3984fd1567632b48d6dfd29216da3ded3d7b844a8014a66"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4865cd1d419e0c7a7bf6de1777b185eebdc51470800a9f42b9e9decf17762081"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:303c252947ab4b14c08afeb52375b26781ccd6a5ccd81abcdfc1fafd14cf93c1"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637f674226be46f6ba372fd29d9523dd977a291f66ab2a74fbeb5530bb3f445d"}, + {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76a896b2f195b57db25d6b44e7e03f221d32fe318d03ede41f8b4d9ba1bff53c"}, + {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e1fd23e9d01591bab45546c089ae89d926917a66dceb3abcf01f6105d927e2cb"}, + {file = "contourpy-1.3.0-cp311-cp311-win32.whl", hash = "sha256:d402880b84df3bec6eab53cd0cf802cae6a2ef9537e70cf75e91618a3801c20c"}, + {file = "contourpy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:6cb6cc968059db9c62cb35fbf70248f40994dfcd7aa10444bbf8b3faeb7c2d67"}, + {file = "contourpy-1.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:570ef7cf892f0afbe5b2ee410c507ce12e15a5fa91017a0009f79f7d93a1268f"}, + {file = "contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da84c537cb8b97d153e9fb208c221c45605f73147bd4cadd23bdae915042aad6"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0be4d8425bfa755e0fd76ee1e019636ccc7c29f77a7c86b4328a9eb6a26d0639"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c0da700bf58f6e0b65312d0a5e695179a71d0163957fa381bb3c1f72972537c"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb8b141bb00fa977d9122636b16aa67d37fd40a3d8b52dd837e536d64b9a4d06"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3634b5385c6716c258d0419c46d05c8aa7dc8cb70326c9a4fb66b69ad2b52e09"}, + {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dce35502151b6bd35027ac39ba6e5a44be13a68f55735c3612c568cac3805fd"}, + {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea348f053c645100612b333adc5983d87be69acdc6d77d3169c090d3b01dc35"}, + {file = "contourpy-1.3.0-cp312-cp312-win32.whl", hash = "sha256:90f73a5116ad1ba7174341ef3ea5c3150ddf20b024b98fb0c3b29034752c8aeb"}, + {file = "contourpy-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:b11b39aea6be6764f84360fce6c82211a9db32a7c7de8fa6dd5397cf1d079c3b"}, + {file = "contourpy-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3e1c7fa44aaae40a2247e2e8e0627f4bea3dd257014764aa644f319a5f8600e3"}, + {file = "contourpy-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:364174c2a76057feef647c802652f00953b575723062560498dc7930fc9b1cb7"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32b238b3b3b649e09ce9aaf51f0c261d38644bdfa35cbaf7b263457850957a84"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d51fca85f9f7ad0b65b4b9fe800406d0d77017d7270d31ec3fb1cc07358fdea0"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:732896af21716b29ab3e988d4ce14bc5133733b85956316fb0c56355f398099b"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d73f659398a0904e125280836ae6f88ba9b178b2fed6884f3b1f95b989d2c8da"}, + {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6c7c2408b7048082932cf4e641fa3b8ca848259212f51c8c59c45aa7ac18f14"}, + {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f317576606de89da6b7e0861cf6061f6146ead3528acabff9236458a6ba467f8"}, + {file = "contourpy-1.3.0-cp313-cp313-win32.whl", hash = "sha256:31cd3a85dbdf1fc002280c65caa7e2b5f65e4a973fcdf70dd2fdcb9868069294"}, + {file = "contourpy-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4553c421929ec95fb07b3aaca0fae668b2eb5a5203d1217ca7c34c063c53d087"}, + {file = "contourpy-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:345af746d7766821d05d72cb8f3845dfd08dd137101a2cb9b24de277d716def8"}, + {file = "contourpy-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3bb3808858a9dc68f6f03d319acd5f1b8a337e6cdda197f02f4b8ff67ad2057b"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:420d39daa61aab1221567b42eecb01112908b2cab7f1b4106a52caaec8d36973"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d63ee447261e963af02642ffcb864e5a2ee4cbfd78080657a9880b8b1868e18"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:167d6c890815e1dac9536dca00828b445d5d0df4d6a8c6adb4a7ec3166812fa8"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:710a26b3dc80c0e4febf04555de66f5fd17e9cf7170a7b08000601a10570bda6"}, + {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:75ee7cb1a14c617f34a51d11fa7524173e56551646828353c4af859c56b766e2"}, + {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927"}, + {file = "contourpy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a11077e395f67ffc2c44ec2418cfebed032cd6da3022a94fc227b6faf8e2acb8"}, + {file = "contourpy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8134301d7e204c88ed7ab50028ba06c683000040ede1d617298611f9dc6240c"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e12968fdfd5bb45ffdf6192a590bd8ddd3ba9e58360b29683c6bb71a7b41edca"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd2a0fc506eccaaa7595b7e1418951f213cf8255be2600f1ea1b61e46a60c55f"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfb5c62ce023dfc410d6059c936dcf96442ba40814aefbfa575425a3a7f19dc"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68a32389b06b82c2fdd68276148d7b9275b5f5cf13e5417e4252f6d1a34f72a2"}, + {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94e848a6b83da10898cbf1311a815f770acc9b6a3f2d646f330d57eb4e87592e"}, + {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d78ab28a03c854a873787a0a42254a0ccb3cb133c672f645c9f9c8f3ae9d0800"}, + {file = "contourpy-1.3.0-cp39-cp39-win32.whl", hash = "sha256:81cb5ed4952aae6014bc9d0421dec7c5835c9c8c31cdf51910b708f548cf58e5"}, + {file = "contourpy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:14e262f67bd7e6eb6880bc564dcda30b15e351a594657e55b7eec94b6ef72843"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe41b41505a5a33aeaed2a613dccaeaa74e0e3ead6dd6fd3a118fb471644fd6c"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eca7e17a65f72a5133bdbec9ecf22401c62bcf4821361ef7811faee695799779"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ec4dc6bf570f5b22ed0d7efba0dfa9c5b9e0431aeea7581aa217542d9e809a4"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:00ccd0dbaad6d804ab259820fa7cb0b8036bda0686ef844d24125d8287178ce0"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca947601224119117f7c19c9cdf6b3ab54c5726ef1d906aa4a69dfb6dd58102"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6ec93afeb848a0845a18989da3beca3eec2c0f852322efe21af1931147d12cb"}, + {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"}, ] [package.dependencies] -numpy = ">=1.20" +numpy = ">=1.23" [package.extras] bokeh = ["bokeh", "selenium"] docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] -mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.8.0)", "types-Pillow"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"] test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] -test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] +test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" @@ -510,7 +502,6 @@ version = "7.5.3" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, @@ -570,7 +561,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli ; python_full_version <= \"3.11.0a6\""] +toml = ["tomli"] [[package]] name = "cycler" @@ -578,7 +569,6 @@ version = "0.12.1" description = "Composable style cycles" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, @@ -590,21 +580,20 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "deprecated" -version = "1.2.14" +version = "1.2.18" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["dev"] +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ - {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, - {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, + {file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"}, + {file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"}, ] [package.dependencies] wrapt = ">=1.10,<2" [package.extras] -dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"] [[package]] name = "distlib" @@ -612,7 +601,6 @@ version = "0.3.8" description = "Distribution utilities" optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, @@ -624,8 +612,6 @@ version = "1.2.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" -groups = ["dev"] -markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, @@ -640,7 +626,6 @@ version = "3.14.0" description = "A platform independent file lock." optional = false python-versions = ">=3.8" -groups = ["main", "dev"] files = [ {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"}, {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"}, @@ -649,7 +634,7 @@ files = [ [package.extras] docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] -typing = ["typing-extensions (>=4.8) ; python_version < \"3.11\""] +typing = ["typing-extensions (>=4.8)"] [[package]] name = "flair" @@ -657,7 +642,6 @@ version = "0.13.1" description = "A very simple framework for state-of-the-art NLP" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "flair-0.13.1-py3-none-any.whl", hash = "sha256:d38a01bbaf6c708059a3a23f10487e1d2dabb3171fc262fac35d95215821c2cc"}, {file = "flair-0.13.1.tar.gz", hash = "sha256:34f6e99180a2d4b9f24783e6225be0fe8d7ab2d1c6c59dbc1cec4b4eb03ba12d"}, @@ -696,69 +680,76 @@ wikipedia-api = ">=0.5.7" [[package]] name = "fonttools" -version = "4.53.1" +version = "4.57.0" description = "Tools to manipulate font files" optional = false python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "fonttools-4.53.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0679a30b59d74b6242909945429dbddb08496935b82f91ea9bf6ad240ec23397"}, - {file = "fonttools-4.53.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8bf06b94694251861ba7fdeea15c8ec0967f84c3d4143ae9daf42bbc7717fe3"}, - {file = "fonttools-4.53.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b96cd370a61f4d083c9c0053bf634279b094308d52fdc2dd9a22d8372fdd590d"}, - {file = "fonttools-4.53.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1c7c5aa18dd3b17995898b4a9b5929d69ef6ae2af5b96d585ff4005033d82f0"}, - {file = "fonttools-4.53.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e013aae589c1c12505da64a7d8d023e584987e51e62006e1bb30d72f26522c41"}, - {file = "fonttools-4.53.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9efd176f874cb6402e607e4cc9b4a9cd584d82fc34a4b0c811970b32ba62501f"}, - {file = "fonttools-4.53.1-cp310-cp310-win32.whl", hash = "sha256:c8696544c964500aa9439efb6761947393b70b17ef4e82d73277413f291260a4"}, - {file = "fonttools-4.53.1-cp310-cp310-win_amd64.whl", hash = "sha256:8959a59de5af6d2bec27489e98ef25a397cfa1774b375d5787509c06659b3671"}, - {file = "fonttools-4.53.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:da33440b1413bad53a8674393c5d29ce64d8c1a15ef8a77c642ffd900d07bfe1"}, - {file = "fonttools-4.53.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ff7e5e9bad94e3a70c5cd2fa27f20b9bb9385e10cddab567b85ce5d306ea923"}, - {file = "fonttools-4.53.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6e7170d675d12eac12ad1a981d90f118c06cf680b42a2d74c6c931e54b50719"}, - {file = "fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee32ea8765e859670c4447b0817514ca79054463b6b79784b08a8df3a4d78e3"}, - {file = "fonttools-4.53.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6e08f572625a1ee682115223eabebc4c6a2035a6917eac6f60350aba297ccadb"}, - {file = "fonttools-4.53.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b21952c092ffd827504de7e66b62aba26fdb5f9d1e435c52477e6486e9d128b2"}, - {file = "fonttools-4.53.1-cp311-cp311-win32.whl", hash = "sha256:9dfdae43b7996af46ff9da520998a32b105c7f098aeea06b2226b30e74fbba88"}, - {file = "fonttools-4.53.1-cp311-cp311-win_amd64.whl", hash = "sha256:d4d0096cb1ac7a77b3b41cd78c9b6bc4a400550e21dc7a92f2b5ab53ed74eb02"}, - {file = "fonttools-4.53.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d92d3c2a1b39631a6131c2fa25b5406855f97969b068e7e08413325bc0afba58"}, - {file = "fonttools-4.53.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3b3c8ebafbee8d9002bd8f1195d09ed2bd9ff134ddec37ee8f6a6375e6a4f0e8"}, - {file = "fonttools-4.53.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f029c095ad66c425b0ee85553d0dc326d45d7059dbc227330fc29b43e8ba60"}, - {file = "fonttools-4.53.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f5e6c3510b79ea27bb1ebfcc67048cde9ec67afa87c7dd7efa5c700491ac7f"}, - {file = "fonttools-4.53.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f677ce218976496a587ab17140da141557beb91d2a5c1a14212c994093f2eae2"}, - {file = "fonttools-4.53.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9e6ceba2a01b448e36754983d376064730690401da1dd104ddb543519470a15f"}, - {file = "fonttools-4.53.1-cp312-cp312-win32.whl", hash = "sha256:791b31ebbc05197d7aa096bbc7bd76d591f05905d2fd908bf103af4488e60670"}, - {file = "fonttools-4.53.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ed170b5e17da0264b9f6fae86073be3db15fa1bd74061c8331022bca6d09bab"}, - {file = "fonttools-4.53.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c818c058404eb2bba05e728d38049438afd649e3c409796723dfc17cd3f08749"}, - {file = "fonttools-4.53.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:651390c3b26b0c7d1f4407cad281ee7a5a85a31a110cbac5269de72a51551ba2"}, - {file = "fonttools-4.53.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e54f1bba2f655924c1138bbc7fa91abd61f45c68bd65ab5ed985942712864bbb"}, - {file = "fonttools-4.53.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9cd19cf4fe0595ebdd1d4915882b9440c3a6d30b008f3cc7587c1da7b95be5f"}, - {file = "fonttools-4.53.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2af40ae9cdcb204fc1d8f26b190aa16534fcd4f0df756268df674a270eab575d"}, - {file = "fonttools-4.53.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:35250099b0cfb32d799fb5d6c651220a642fe2e3c7d2560490e6f1d3f9ae9169"}, - {file = "fonttools-4.53.1-cp38-cp38-win32.whl", hash = "sha256:f08df60fbd8d289152079a65da4e66a447efc1d5d5a4d3f299cdd39e3b2e4a7d"}, - {file = "fonttools-4.53.1-cp38-cp38-win_amd64.whl", hash = "sha256:7b6b35e52ddc8fb0db562133894e6ef5b4e54e1283dff606fda3eed938c36fc8"}, - {file = "fonttools-4.53.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:75a157d8d26c06e64ace9df037ee93a4938a4606a38cb7ffaf6635e60e253b7a"}, - {file = "fonttools-4.53.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4824c198f714ab5559c5be10fd1adf876712aa7989882a4ec887bf1ef3e00e31"}, - {file = "fonttools-4.53.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:becc5d7cb89c7b7afa8321b6bb3dbee0eec2b57855c90b3e9bf5fb816671fa7c"}, - {file = "fonttools-4.53.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84ec3fb43befb54be490147b4a922b5314e16372a643004f182babee9f9c3407"}, - {file = "fonttools-4.53.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:73379d3ffdeecb376640cd8ed03e9d2d0e568c9d1a4e9b16504a834ebadc2dfb"}, - {file = "fonttools-4.53.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:02569e9a810f9d11f4ae82c391ebc6fb5730d95a0657d24d754ed7763fb2d122"}, - {file = "fonttools-4.53.1-cp39-cp39-win32.whl", hash = "sha256:aae7bd54187e8bf7fd69f8ab87b2885253d3575163ad4d669a262fe97f0136cb"}, - {file = "fonttools-4.53.1-cp39-cp39-win_amd64.whl", hash = "sha256:e5b708073ea3d684235648786f5f6153a48dc8762cdfe5563c57e80787c29fbb"}, - {file = "fonttools-4.53.1-py3-none-any.whl", hash = "sha256:f1f8758a2ad110bd6432203a344269f445a2907dc24ef6bccfd0ac4e14e0d71d"}, - {file = "fonttools-4.53.1.tar.gz", hash = "sha256:e128778a8e9bc11159ce5447f76766cefbd876f44bd79aff030287254e4752c4"}, +files = [ + {file = "fonttools-4.57.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:babe8d1eb059a53e560e7bf29f8e8f4accc8b6cfb9b5fd10e485bde77e71ef41"}, + {file = "fonttools-4.57.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:81aa97669cd726349eb7bd43ca540cf418b279ee3caba5e2e295fb4e8f841c02"}, + {file = "fonttools-4.57.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0e9618630edd1910ad4f07f60d77c184b2f572c8ee43305ea3265675cbbfe7e"}, + {file = "fonttools-4.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34687a5d21f1d688d7d8d416cb4c5b9c87fca8a1797ec0d74b9fdebfa55c09ab"}, + {file = "fonttools-4.57.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69ab81b66ebaa8d430ba56c7a5f9abe0183afefd3a2d6e483060343398b13fb1"}, + {file = "fonttools-4.57.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d639397de852f2ccfb3134b152c741406752640a266d9c1365b0f23d7b88077f"}, + {file = "fonttools-4.57.0-cp310-cp310-win32.whl", hash = "sha256:cc066cb98b912f525ae901a24cd381a656f024f76203bc85f78fcc9e66ae5aec"}, + {file = "fonttools-4.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7a64edd3ff6a7f711a15bd70b4458611fb240176ec11ad8845ccbab4fe6745db"}, + {file = "fonttools-4.57.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3871349303bdec958360eedb619169a779956503ffb4543bb3e6211e09b647c4"}, + {file = "fonttools-4.57.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c59375e85126b15a90fcba3443eaac58f3073ba091f02410eaa286da9ad80ed8"}, + {file = "fonttools-4.57.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967b65232e104f4b0f6370a62eb33089e00024f2ce143aecbf9755649421c683"}, + {file = "fonttools-4.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39acf68abdfc74e19de7485f8f7396fa4d2418efea239b7061d6ed6a2510c746"}, + {file = "fonttools-4.57.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d077f909f2343daf4495ba22bb0e23b62886e8ec7c109ee8234bdbd678cf344"}, + {file = "fonttools-4.57.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:46370ac47a1e91895d40e9ad48effbe8e9d9db1a4b80888095bc00e7beaa042f"}, + {file = "fonttools-4.57.0-cp311-cp311-win32.whl", hash = "sha256:ca2aed95855506b7ae94e8f1f6217b7673c929e4f4f1217bcaa236253055cb36"}, + {file = "fonttools-4.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:17168a4670bbe3775f3f3f72d23ee786bd965395381dfbb70111e25e81505b9d"}, + {file = "fonttools-4.57.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:889e45e976c74abc7256d3064aa7c1295aa283c6bb19810b9f8b604dfe5c7f31"}, + {file = "fonttools-4.57.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0425c2e052a5f1516c94e5855dbda706ae5a768631e9fcc34e57d074d1b65b92"}, + {file = "fonttools-4.57.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44c26a311be2ac130f40a96769264809d3b0cb297518669db437d1cc82974888"}, + {file = "fonttools-4.57.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84c41ba992df5b8d680b89fd84c6a1f2aca2b9f1ae8a67400c8930cd4ea115f6"}, + {file = "fonttools-4.57.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ea1e9e43ca56b0c12440a7c689b1350066595bebcaa83baad05b8b2675129d98"}, + {file = "fonttools-4.57.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:84fd56c78d431606332a0627c16e2a63d243d0d8b05521257d77c6529abe14d8"}, + {file = "fonttools-4.57.0-cp312-cp312-win32.whl", hash = "sha256:f4376819c1c778d59e0a31db5dc6ede854e9edf28bbfa5b756604727f7f800ac"}, + {file = "fonttools-4.57.0-cp312-cp312-win_amd64.whl", hash = "sha256:57e30241524879ea10cdf79c737037221f77cc126a8cdc8ff2c94d4a522504b9"}, + {file = "fonttools-4.57.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:408ce299696012d503b714778d89aa476f032414ae57e57b42e4b92363e0b8ef"}, + {file = "fonttools-4.57.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bbceffc80aa02d9e8b99f2a7491ed8c4a783b2fc4020119dc405ca14fb5c758c"}, + {file = "fonttools-4.57.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f022601f3ee9e1f6658ed6d184ce27fa5216cee5b82d279e0f0bde5deebece72"}, + {file = "fonttools-4.57.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dea5893b58d4637ffa925536462ba626f8a1b9ffbe2f5c272cdf2c6ebadb817"}, + {file = "fonttools-4.57.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dff02c5c8423a657c550b48231d0a48d7e2b2e131088e55983cfe74ccc2c7cc9"}, + {file = "fonttools-4.57.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:767604f244dc17c68d3e2dbf98e038d11a18abc078f2d0f84b6c24571d9c0b13"}, + {file = "fonttools-4.57.0-cp313-cp313-win32.whl", hash = "sha256:8e2e12d0d862f43d51e5afb8b9751c77e6bec7d2dc00aad80641364e9df5b199"}, + {file = "fonttools-4.57.0-cp313-cp313-win_amd64.whl", hash = "sha256:f1d6bc9c23356908db712d282acb3eebd4ae5ec6d8b696aa40342b1d84f8e9e3"}, + {file = "fonttools-4.57.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9d57b4e23ebbe985125d3f0cabbf286efa191ab60bbadb9326091050d88e8213"}, + {file = "fonttools-4.57.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:579ba873d7f2a96f78b2e11028f7472146ae181cae0e4d814a37a09e93d5c5cc"}, + {file = "fonttools-4.57.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e3e1ec10c29bae0ea826b61f265ec5c858c5ba2ce2e69a71a62f285cf8e4595"}, + {file = "fonttools-4.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1968f2a2003c97c4ce6308dc2498d5fd4364ad309900930aa5a503c9851aec8"}, + {file = "fonttools-4.57.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:aff40f8ac6763d05c2c8f6d240c6dac4bb92640a86d9b0c3f3fff4404f34095c"}, + {file = "fonttools-4.57.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d07f1b64008e39fceae7aa99e38df8385d7d24a474a8c9872645c4397b674481"}, + {file = "fonttools-4.57.0-cp38-cp38-win32.whl", hash = "sha256:51d8482e96b28fb28aa8e50b5706f3cee06de85cbe2dce80dbd1917ae22ec5a6"}, + {file = "fonttools-4.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:03290e818782e7edb159474144fca11e36a8ed6663d1fcbd5268eb550594fd8e"}, + {file = "fonttools-4.57.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7339e6a3283e4b0ade99cade51e97cde3d54cd6d1c3744459e886b66d630c8b3"}, + {file = "fonttools-4.57.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:05efceb2cb5f6ec92a4180fcb7a64aa8d3385fd49cfbbe459350229d1974f0b1"}, + {file = "fonttools-4.57.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a97bb05eb24637714a04dee85bdf0ad1941df64fe3b802ee4ac1c284a5f97b7c"}, + {file = "fonttools-4.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:541cb48191a19ceb1a2a4b90c1fcebd22a1ff7491010d3cf840dd3a68aebd654"}, + {file = "fonttools-4.57.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:cdef9a056c222d0479a1fdb721430f9efd68268014c54e8166133d2643cb05d9"}, + {file = "fonttools-4.57.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3cf97236b192a50a4bf200dc5ba405aa78d4f537a2c6e4c624bb60466d5b03bd"}, + {file = "fonttools-4.57.0-cp39-cp39-win32.whl", hash = "sha256:e952c684274a7714b3160f57ec1d78309f955c6335c04433f07d36c5eb27b1f9"}, + {file = "fonttools-4.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2a722c0e4bfd9966a11ff55c895c817158fcce1b2b6700205a376403b546ad9"}, + {file = "fonttools-4.57.0-py3-none-any.whl", hash = "sha256:3122c604a675513c68bd24c6a8f9091f1c2376d18e8f5fe5a101746c81b3e98f"}, + {file = "fonttools-4.57.0.tar.gz", hash = "sha256:727ece10e065be2f9dd239d15dd5d60a66e17eac11aea47d447f9f03fdbc42de"}, ] [package.extras] -all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] graphite = ["lz4 (>=1.7.4.2)"] -interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] +interpolatable = ["munkres", "pycairo", "scipy"] lxml = ["lxml (>=4.0)"] pathops = ["skia-pathops (>=0.5.0)"] plot = ["matplotlib"] repacker = ["uharfbuzz (>=0.23.0)"] symfont = ["sympy"] -type1 = ["xattr ; sys_platform == \"darwin\""] +type1 = ["xattr"] ufo = ["fs (>=2.2.0,<3)"] -unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""] -woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] [[package]] name = "frozenlist" @@ -766,7 +757,6 @@ version = "1.4.1" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, @@ -853,7 +843,6 @@ version = "2023.6.0" description = "File-system specification" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] files = [ {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, @@ -889,18 +878,17 @@ tqdm = ["tqdm"] [[package]] name = "ftfy" -version = "6.2.0" +version = "6.3.1" description = "Fixes mojibake and other problems with Unicode, after the fact" optional = false -python-versions = ">=3.8,<4" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "ftfy-6.2.0-py3-none-any.whl", hash = "sha256:f94a2c34b76e07475720e3096f5ca80911d152406fbde66fdb45c4d0c9150026"}, - {file = "ftfy-6.2.0.tar.gz", hash = "sha256:5e42143c7025ef97944ca2619d6b61b0619fc6654f98771d39e862c1424c75c0"}, + {file = "ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083"}, + {file = "ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec"}, ] [package.dependencies] -wcwidth = ">=0.2.12,<0.3.0" +wcwidth = "*" [[package]] name = "gdown" @@ -908,7 +896,6 @@ version = "5.2.0" description = "Google Drive Public File/Folder Downloader" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "gdown-5.2.0-py3-none-any.whl", hash = "sha256:33083832d82b1101bdd0e9df3edd0fbc0e1c5f14c9d8c38d2a35bf1683b526d6"}, {file = "gdown-5.2.0.tar.gz", hash = "sha256:2145165062d85520a3cd98b356c9ed522c5e7984d408535409fd46f94defc787"}, @@ -929,7 +916,6 @@ version = "4.3.3" description = "Python framework for fast Vector Space Modelling" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "gensim-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4e72840adfbea35c5804fd559bc0cb6bc9f439926220a37d852b7ce76eb325c1"}, {file = "gensim-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4019263c9d9afae7c669f880c17e09461e77a71afce04ed4d79cf71a4cad2848"}, @@ -976,7 +962,6 @@ version = "0.25.2" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" -groups = ["main", "dev"] files = [ {file = "huggingface_hub-0.25.2-py3-none-any.whl", hash = "sha256:1897caf88ce7f97fe0110603d8f66ac264e3ba6accdf30cd66cc0fed5282ad25"}, {file = "huggingface_hub-0.25.2.tar.gz", hash = "sha256:a1014ea111a5f40ccd23f7f7ba8ac46e20fa3b658ced1f86a00c75c06ec6423c"}, @@ -1011,7 +996,6 @@ version = "2.5.36" description = "File identification library for Python" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"}, {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, @@ -1026,7 +1010,6 @@ version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" -groups = ["main", "dev"] files = [ {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, @@ -1034,23 +1017,25 @@ files = [ [[package]] name = "importlib-resources" -version = "6.4.0" +version = "6.5.2" description = "Read resources from Python packages" optional = false -python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version == \"3.9\"" +python-versions = ">=3.9" files = [ - {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, - {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, + {file = "importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec"}, + {file = "importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c"}, ] [package.dependencies] zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy ; platform_python_implementation != \"PyPy\"", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"] +type = ["pytest-mypy"] [[package]] name = "iniconfig" @@ -1058,7 +1043,6 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -1070,8 +1054,6 @@ version = "2021.4.0" description = "Intel OpenMP* Runtime Library" optional = false python-versions = "*" -groups = ["main", "dev"] -markers = "platform_system == \"Windows\"" files = [ {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"}, {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:3b921236a38384e2016f0f3d65af6732cf2c12918087128a9163225451e776f2"}, @@ -1086,7 +1068,6 @@ version = "0.5.0" description = "Japanese morphological analysis engine." optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "Janome-0.5.0-py2.py3-none-any.whl", hash = "sha256:d098670394a77881ce2f6b7d696c0ea5ff74c0c8cf74a8a882159ec82c0e6dc7"}, {file = "Janome-0.5.0.tar.gz", hash = "sha256:ce4a3ed7a4635c2f80139639327d5b1e0381858ad74a3c4a61e8cc83f820400e"}, @@ -1098,7 +1079,6 @@ version = "3.1.4" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" -groups = ["main", "dev"] files = [ {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, @@ -1116,7 +1096,6 @@ version = "1.0.1" description = "JSON Matching Expressions" optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -1128,7 +1107,6 @@ version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, @@ -1136,116 +1114,125 @@ files = [ [[package]] name = "kiwisolver" -version = "1.4.5" +version = "1.4.7" description = "A fast implementation of the Cassowary constraint solver" optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:05703cf211d585109fcd72207a31bb170a0f22144d68298dc5e61b3c946518af"}, - {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:146d14bebb7f1dc4d5fbf74f8a6cb15ac42baadee8912eb84ac0b3b2a3dc6ac3"}, - {file = "kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ef7afcd2d281494c0a9101d5c571970708ad911d028137cd558f02b851c08b4"}, - {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9eaa8b117dc8337728e834b9c6e2611f10c79e38f65157c4c38e9400286f5cb1"}, - {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec20916e7b4cbfb1f12380e46486ec4bcbaa91a9c448b97023fde0d5bbf9e4ff"}, - {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b42c68602539407884cf70d6a480a469b93b81b7701378ba5e2328660c847a"}, - {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa12042de0171fad672b6c59df69106d20d5596e4f87b5e8f76df757a7c399aa"}, - {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a40773c71d7ccdd3798f6489aaac9eee213d566850a9533f8d26332d626b82c"}, - {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:19df6e621f6d8b4b9c4d45f40a66839294ff2bb235e64d2178f7522d9170ac5b"}, - {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:83d78376d0d4fd884e2c114d0621624b73d2aba4e2788182d286309ebdeed770"}, - {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e391b1f0a8a5a10ab3b9bb6afcfd74f2175f24f8975fb87ecae700d1503cdee0"}, - {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:852542f9481f4a62dbb5dd99e8ab7aedfeb8fb6342349a181d4036877410f525"}, - {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59edc41b24031bc25108e210c0def6f6c2191210492a972d585a06ff246bb79b"}, - {file = "kiwisolver-1.4.5-cp310-cp310-win32.whl", hash = "sha256:a6aa6315319a052b4ee378aa171959c898a6183f15c1e541821c5c59beaa0238"}, - {file = "kiwisolver-1.4.5-cp310-cp310-win_amd64.whl", hash = "sha256:d0ef46024e6a3d79c01ff13801cb19d0cad7fd859b15037aec74315540acc276"}, - {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:11863aa14a51fd6ec28688d76f1735f8f69ab1fabf388851a595d0721af042f5"}, - {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ab3919a9997ab7ef2fbbed0cc99bb28d3c13e6d4b1ad36e97e482558a91be90"}, - {file = "kiwisolver-1.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fcc700eadbbccbf6bc1bcb9dbe0786b4b1cb91ca0dcda336eef5c2beed37b797"}, - {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfdd7c0b105af050eb3d64997809dc21da247cf44e63dc73ff0fd20b96be55a9"}, - {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76c6a5964640638cdeaa0c359382e5703e9293030fe730018ca06bc2010c4437"}, - {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbea0db94288e29afcc4c28afbf3a7ccaf2d7e027489c449cf7e8f83c6346eb9"}, - {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ceec1a6bc6cab1d6ff5d06592a91a692f90ec7505d6463a88a52cc0eb58545da"}, - {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:040c1aebeda72197ef477a906782b5ab0d387642e93bda547336b8957c61022e"}, - {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f91de7223d4c7b793867797bacd1ee53bfe7359bd70d27b7b58a04efbb9436c8"}, - {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:faae4860798c31530dd184046a900e652c95513796ef51a12bc086710c2eec4d"}, - {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0157420efcb803e71d1b28e2c287518b8808b7cf1ab8af36718fd0a2c453eb0"}, - {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:06f54715b7737c2fecdbf140d1afb11a33d59508a47bf11bb38ecf21dc9ab79f"}, - {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fdb7adb641a0d13bdcd4ef48e062363d8a9ad4a182ac7647ec88f695e719ae9f"}, - {file = "kiwisolver-1.4.5-cp311-cp311-win32.whl", hash = "sha256:bb86433b1cfe686da83ce32a9d3a8dd308e85c76b60896d58f082136f10bffac"}, - {file = "kiwisolver-1.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c08e1312a9cf1074d17b17728d3dfce2a5125b2d791527f33ffbe805200a355"}, - {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:32d5cf40c4f7c7b3ca500f8985eb3fb3a7dfc023215e876f207956b5ea26632a"}, - {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f846c260f483d1fd217fe5ed7c173fb109efa6b1fc8381c8b7552c5781756192"}, - {file = "kiwisolver-1.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5ff5cf3571589b6d13bfbfd6bcd7a3f659e42f96b5fd1c4830c4cf21d4f5ef45"}, - {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7269d9e5f1084a653d575c7ec012ff57f0c042258bf5db0954bf551c158466e7"}, - {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da802a19d6e15dffe4b0c24b38b3af68e6c1a68e6e1d8f30148c83864f3881db"}, - {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3aba7311af82e335dd1e36ffff68aaca609ca6290c2cb6d821a39aa075d8e3ff"}, - {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:763773d53f07244148ccac5b084da5adb90bfaee39c197554f01b286cf869228"}, - {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2270953c0d8cdab5d422bee7d2007f043473f9d2999631c86a223c9db56cbd16"}, - {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d099e745a512f7e3bbe7249ca835f4d357c586d78d79ae8f1dcd4d8adeb9bda9"}, - {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:74db36e14a7d1ce0986fa104f7d5637aea5c82ca6326ed0ec5694280942d1162"}, - {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e5bab140c309cb3a6ce373a9e71eb7e4873c70c2dda01df6820474f9889d6d4"}, - {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0f114aa76dc1b8f636d077979c0ac22e7cd8f3493abbab152f20eb8d3cda71f3"}, - {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:88a2df29d4724b9237fc0c6eaf2a1adae0cdc0b3e9f4d8e7dc54b16812d2d81a"}, - {file = "kiwisolver-1.4.5-cp312-cp312-win32.whl", hash = "sha256:72d40b33e834371fd330fb1472ca19d9b8327acb79a5821d4008391db8e29f20"}, - {file = "kiwisolver-1.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:2c5674c4e74d939b9d91dda0fae10597ac7521768fec9e399c70a1f27e2ea2d9"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3a2b053a0ab7a3960c98725cfb0bf5b48ba82f64ec95fe06f1d06c99b552e130"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd32d6c13807e5c66a7cbb79f90b553642f296ae4518a60d8d76243b0ad2898"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59ec7b7c7e1a61061850d53aaf8e93db63dce0c936db1fda2658b70e4a1be709"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da4cfb373035def307905d05041c1d06d8936452fe89d464743ae7fb8371078b"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2400873bccc260b6ae184b2b8a4fec0e4082d30648eadb7c3d9a13405d861e89"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1b04139c4236a0f3aff534479b58f6f849a8b351e1314826c2d230849ed48985"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:4e66e81a5779b65ac21764c295087de82235597a2293d18d943f8e9e32746265"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7931d8f1f67c4be9ba1dd9c451fb0eeca1a25b89e4d3f89e828fe12a519b782a"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:b3f7e75f3015df442238cca659f8baa5f42ce2a8582727981cbfa15fee0ee205"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:bbf1d63eef84b2e8c89011b7f2235b1e0bf7dacc11cac9431fc6468e99ac77fb"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4c380469bd3f970ef677bf2bcba2b6b0b4d5c75e7a020fb863ef75084efad66f"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-win32.whl", hash = "sha256:9408acf3270c4b6baad483865191e3e582b638b1654a007c62e3efe96f09a9a3"}, - {file = "kiwisolver-1.4.5-cp37-cp37m-win_amd64.whl", hash = "sha256:5b94529f9b2591b7af5f3e0e730a4e0a41ea174af35a4fd067775f9bdfeee01a"}, - {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:11c7de8f692fc99816e8ac50d1d1aef4f75126eefc33ac79aac02c099fd3db71"}, - {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:53abb58632235cd154176ced1ae8f0d29a6657aa1aa9decf50b899b755bc2b93"}, - {file = "kiwisolver-1.4.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:88b9f257ca61b838b6f8094a62418421f87ac2a1069f7e896c36a7d86b5d4c29"}, - {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3195782b26fc03aa9c6913d5bad5aeb864bdc372924c093b0f1cebad603dd712"}, - {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc579bf0f502e54926519451b920e875f433aceb4624a3646b3252b5caa9e0b6"}, - {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a580c91d686376f0f7c295357595c5a026e6cbc3d77b7c36e290201e7c11ecb"}, - {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cfe6ab8da05c01ba6fbea630377b5da2cd9bcbc6338510116b01c1bc939a2c18"}, - {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d2e5a98f0ec99beb3c10e13b387f8db39106d53993f498b295f0c914328b1333"}, - {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a51a263952b1429e429ff236d2f5a21c5125437861baeed77f5e1cc2d2c7c6da"}, - {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3edd2fa14e68c9be82c5b16689e8d63d89fe927e56debd6e1dbce7a26a17f81b"}, - {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:74d1b44c6cfc897df648cc9fdaa09bc3e7679926e6f96df05775d4fb3946571c"}, - {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76d9289ed3f7501012e05abb8358bbb129149dbd173f1f57a1bf1c22d19ab7cc"}, - {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:92dea1ffe3714fa8eb6a314d2b3c773208d865a0e0d35e713ec54eea08a66250"}, - {file = "kiwisolver-1.4.5-cp38-cp38-win32.whl", hash = "sha256:5c90ae8c8d32e472be041e76f9d2f2dbff4d0b0be8bd4041770eddb18cf49a4e"}, - {file = "kiwisolver-1.4.5-cp38-cp38-win_amd64.whl", hash = "sha256:c7940c1dc63eb37a67721b10d703247552416f719c4188c54e04334321351ced"}, - {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9407b6a5f0d675e8a827ad8742e1d6b49d9c1a1da5d952a67d50ef5f4170b18d"}, - {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15568384086b6df3c65353820a4473575dbad192e35010f622c6ce3eebd57af9"}, - {file = "kiwisolver-1.4.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0dc9db8e79f0036e8173c466d21ef18e1befc02de8bf8aa8dc0813a6dc8a7046"}, - {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cdc8a402aaee9a798b50d8b827d7ecf75edc5fb35ea0f91f213ff927c15f4ff0"}, - {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff"}, - {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:955e8513d07a283056b1396e9a57ceddbd272d9252c14f154d450d227606eb54"}, - {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:346f5343b9e3f00b8db8ba359350eb124b98c99efd0b408728ac6ebf38173958"}, - {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9098e0049e88c6a24ff64545cdfc50807818ba6c1b739cae221bbbcbc58aad3"}, - {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:00bd361b903dc4bbf4eb165f24d1acbee754fce22ded24c3d56eec268658a5cf"}, - {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7b8b454bac16428b22560d0a1cf0a09875339cab69df61d7805bf48919415901"}, - {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f1d072c2eb0ad60d4c183f3fb44ac6f73fb7a8f16a2694a91f988275cbf352f9"}, - {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:31a82d498054cac9f6d0b53d02bb85811185bcb477d4b60144f915f3b3126342"}, - {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6512cb89e334e4700febbffaaa52761b65b4f5a3cf33f960213d5656cea36a77"}, - {file = "kiwisolver-1.4.5-cp39-cp39-win32.whl", hash = "sha256:9db8ea4c388fdb0f780fe91346fd438657ea602d58348753d9fb265ce1bca67f"}, - {file = "kiwisolver-1.4.5-cp39-cp39-win_amd64.whl", hash = "sha256:59415f46a37f7f2efeec758353dd2eae1b07640d8ca0f0c42548ec4125492635"}, - {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5c7b3b3a728dc6faf3fc372ef24f21d1e3cee2ac3e9596691d746e5a536de920"}, - {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:620ced262a86244e2be10a676b646f29c34537d0d9cc8eb26c08f53d98013390"}, - {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:378a214a1e3bbf5ac4a8708304318b4f890da88c9e6a07699c4ae7174c09a68d"}, - {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf7be1207676ac608a50cd08f102f6742dbfc70e8d60c4db1c6897f62f71523"}, - {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ba55dce0a9b8ff59495ddd050a0225d58bd0983d09f87cfe2b6aec4f2c1234e4"}, - {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fd32ea360bcbb92d28933fc05ed09bffcb1704ba3fc7942e81db0fd4f81a7892"}, - {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5e7139af55d1688f8b960ee9ad5adafc4ac17c1c473fe07133ac092310d76544"}, - {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dced8146011d2bc2e883f9bd68618b8247387f4bbec46d7392b3c3b032640126"}, - {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9bf3325c47b11b2e51bca0824ea217c7cd84491d8ac4eefd1e409705ef092bd"}, - {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5794cf59533bc3f1b1c821f7206a3617999db9fbefc345360aafe2e067514929"}, - {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e368f200bbc2e4f905b8e71eb38b3c04333bddaa6a2464a6355487b02bb7fb09"}, - {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d706eba36b4c4d5bc6c6377bb6568098765e990cfc21ee16d13963fab7b3e7"}, - {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85267bd1aa8880a9c88a8cb71e18d3d64d2751a790e6ca6c27b8ccc724bcd5ad"}, - {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210ef2c3a1f03272649aff1ef992df2e724748918c4bc2d5a90352849eb40bea"}, - {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:11d011a7574eb3b82bcc9c1a1d35c1d7075677fdd15de527d91b46bd35e935ee"}, - {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, +python-versions = ">=3.8" +files = [ + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"}, + {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"}, ] [[package]] @@ -1254,7 +1241,6 @@ version = "1.0.9" description = "Language detection library ported from Google's language-detection." optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"}, @@ -1269,7 +1255,6 @@ version = "0.11.2" description = "Lightning toolbox for across the our ecosystem." optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "lightning-utilities-0.11.2.tar.gz", hash = "sha256:adf4cf9c5d912fe505db4729e51d1369c6927f3a8ac55a9dff895ce5c0da08d9"}, {file = "lightning_utilities-0.11.2-py3-none-any.whl", hash = "sha256:541f471ed94e18a28d72879338c8c52e873bb46f4c47644d89228faeb6751159"}, @@ -1287,162 +1272,148 @@ typing = ["mypy (>=1.0.0)", "types-setuptools"] [[package]] name = "lxml" -version = "5.2.2" +version = "5.4.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.6" -groups = ["dev"] -files = [ - {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:364d03207f3e603922d0d3932ef363d55bbf48e3647395765f9bfcbdf6d23632"}, - {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50127c186f191b8917ea2fb8b206fbebe87fd414a6084d15568c27d0a21d60db"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4f025ef3db1c6da4460dd27c118d8cd136d0391da4e387a15e48e5c975147"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981a06a3076997adf7c743dcd0d7a0415582661e2517c7d961493572e909aa1d"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aef5474d913d3b05e613906ba4090433c515e13ea49c837aca18bde190853dff"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e275ea572389e41e8b039ac076a46cb87ee6b8542df3fff26f5baab43713bca"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5b65529bb2f21ac7861a0e94fdbf5dc0daab41497d18223b46ee8515e5ad297"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bcc98f911f10278d1daf14b87d65325851a1d29153caaf146877ec37031d5f36"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:b47633251727c8fe279f34025844b3b3a3e40cd1b198356d003aa146258d13a2"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:fbc9d316552f9ef7bba39f4edfad4a734d3d6f93341232a9dddadec4f15d425f"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:13e69be35391ce72712184f69000cda04fc89689429179bc4c0ae5f0b7a8c21b"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3b6a30a9ab040b3f545b697cb3adbf3696c05a3a68aad172e3fd7ca73ab3c835"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a233bb68625a85126ac9f1fc66d24337d6e8a0f9207b688eec2e7c880f012ec0"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dfa7c241073d8f2b8e8dbc7803c434f57dbb83ae2a3d7892dd068d99e96efe2c"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a7aca7964ac4bb07680d5c9d63b9d7028cace3e2d43175cb50bba8c5ad33316"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ae4073a60ab98529ab8a72ebf429f2a8cc612619a8c04e08bed27450d52103c0"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ffb2be176fed4457e445fe540617f0252a72a8bc56208fd65a690fdb1f57660b"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e290d79a4107d7d794634ce3e985b9ae4f920380a813717adf61804904dc4393"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96e85aa09274955bb6bd483eaf5b12abadade01010478154b0ec70284c1b1526"}, - {file = "lxml-5.2.2-cp310-cp310-win32.whl", hash = "sha256:f956196ef61369f1685d14dad80611488d8dc1ef00be57c0c5a03064005b0f30"}, - {file = "lxml-5.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:875a3f90d7eb5c5d77e529080d95140eacb3c6d13ad5b616ee8095447b1d22e7"}, - {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:45f9494613160d0405682f9eee781c7e6d1bf45f819654eb249f8f46a2c22545"}, - {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0b3f2df149efb242cee2ffdeb6674b7f30d23c9a7af26595099afaf46ef4e88"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d28cb356f119a437cc58a13f8135ab8a4c8ece18159eb9194b0d269ec4e28083"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657a972f46bbefdbba2d4f14413c0d079f9ae243bd68193cb5061b9732fa54c1"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b9ea10063efb77a965a8d5f4182806fbf59ed068b3c3fd6f30d2ac7bee734"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07542787f86112d46d07d4f3c4e7c760282011b354d012dc4141cc12a68cef5f"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:303f540ad2dddd35b92415b74b900c749ec2010e703ab3bfd6660979d01fd4ed"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2eb2227ce1ff998faf0cd7fe85bbf086aa41dfc5af3b1d80867ecfe75fb68df3"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:1d8a701774dfc42a2f0b8ccdfe7dbc140500d1049e0632a611985d943fcf12df"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:56793b7a1a091a7c286b5f4aa1fe4ae5d1446fe742d00cdf2ffb1077865db10d"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eb00b549b13bd6d884c863554566095bf6fa9c3cecb2e7b399c4bc7904cb33b5"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a2569a1f15ae6c8c64108a2cd2b4a858fc1e13d25846be0666fc144715e32ab"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:8cf85a6e40ff1f37fe0f25719aadf443686b1ac7652593dc53c7ef9b8492b115"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d237ba6664b8e60fd90b8549a149a74fcc675272e0e95539a00522e4ca688b04"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0b3f5016e00ae7630a4b83d0868fca1e3d494c78a75b1c7252606a3a1c5fc2ad"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:23441e2b5339bc54dc949e9e675fa35efe858108404ef9aa92f0456929ef6fe8"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2fb0ba3e8566548d6c8e7dd82a8229ff47bd8fb8c2da237607ac8e5a1b8312e5"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:79d1fb9252e7e2cfe4de6e9a6610c7cbb99b9708e2c3e29057f487de5a9eaefa"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6dcc3d17eac1df7859ae01202e9bb11ffa8c98949dcbeb1069c8b9a75917e01b"}, - {file = "lxml-5.2.2-cp311-cp311-win32.whl", hash = "sha256:4c30a2f83677876465f44c018830f608fa3c6a8a466eb223535035fbc16f3438"}, - {file = "lxml-5.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:49095a38eb333aaf44c06052fd2ec3b8f23e19747ca7ec6f6c954ffea6dbf7be"}, - {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7429e7faa1a60cad26ae4227f4dd0459efde239e494c7312624ce228e04f6391"}, - {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:50ccb5d355961c0f12f6cf24b7187dbabd5433f29e15147a67995474f27d1776"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc911208b18842a3a57266d8e51fc3cfaccee90a5351b92079beed912a7914c2"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33ce9e786753743159799fdf8e92a5da351158c4bfb6f2db0bf31e7892a1feb5"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec87c44f619380878bd49ca109669c9f221d9ae6883a5bcb3616785fa8f94c97"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08ea0f606808354eb8f2dfaac095963cb25d9d28e27edcc375d7b30ab01abbf6"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75a9632f1d4f698b2e6e2e1ada40e71f369b15d69baddb8968dcc8e683839b18"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74da9f97daec6928567b48c90ea2c82a106b2d500f397eeb8941e47d30b1ca85"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:0969e92af09c5687d769731e3f39ed62427cc72176cebb54b7a9d52cc4fa3b73"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:9164361769b6ca7769079f4d426a41df6164879f7f3568be9086e15baca61466"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d26a618ae1766279f2660aca0081b2220aca6bd1aa06b2cf73f07383faf48927"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab67ed772c584b7ef2379797bf14b82df9aa5f7438c5b9a09624dd834c1c1aaf"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3d1e35572a56941b32c239774d7e9ad724074d37f90c7a7d499ab98761bd80cf"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:8268cbcd48c5375f46e000adb1390572c98879eb4f77910c6053d25cc3ac2c67"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e282aedd63c639c07c3857097fc0e236f984ceb4089a8b284da1c526491e3f3d"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfdc2bfe69e9adf0df4915949c22a25b39d175d599bf98e7ddf620a13678585"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4aefd911793b5d2d7a921233a54c90329bf3d4a6817dc465f12ffdfe4fc7b8fe"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8b8df03a9e995b6211dafa63b32f9d405881518ff1ddd775db4e7b98fb545e1c"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f11ae142f3a322d44513de1018b50f474f8f736bc3cd91d969f464b5bfef8836"}, - {file = "lxml-5.2.2-cp312-cp312-win32.whl", hash = "sha256:16a8326e51fcdffc886294c1e70b11ddccec836516a343f9ed0f82aac043c24a"}, - {file = "lxml-5.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:bbc4b80af581e18568ff07f6395c02114d05f4865c2812a1f02f2eaecf0bfd48"}, - {file = "lxml-5.2.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e3d9d13603410b72787579769469af730c38f2f25505573a5888a94b62b920f8"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38b67afb0a06b8575948641c1d6d68e41b83a3abeae2ca9eed2ac59892b36706"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c689d0d5381f56de7bd6966a4541bff6e08bf8d3871bbd89a0c6ab18aa699573"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:cf2a978c795b54c539f47964ec05e35c05bd045db5ca1e8366988c7f2fe6b3ce"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:739e36ef7412b2bd940f75b278749106e6d025e40027c0b94a17ef7968d55d56"}, - {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d8bbcd21769594dbba9c37d3c819e2d5847656ca99c747ddb31ac1701d0c0ed9"}, - {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:2304d3c93f2258ccf2cf7a6ba8c761d76ef84948d87bf9664e14d203da2cd264"}, - {file = "lxml-5.2.2-cp36-cp36m-win32.whl", hash = "sha256:02437fb7308386867c8b7b0e5bc4cd4b04548b1c5d089ffb8e7b31009b961dc3"}, - {file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"}, - {file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"}, - {file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"}, - {file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"}, - {file = "lxml-5.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7ed07b3062b055d7a7f9d6557a251cc655eed0b3152b76de619516621c56f5d3"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f60fdd125d85bf9c279ffb8e94c78c51b3b6a37711464e1f5f31078b45002421"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a7e24cb69ee5f32e003f50e016d5fde438010c1022c96738b04fc2423e61706"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23cfafd56887eaed93d07bc4547abd5e09d837a002b791e9767765492a75883f"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19b4e485cd07b7d83e3fe3b72132e7df70bfac22b14fe4bf7a23822c3a35bff5"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7ce7ad8abebe737ad6143d9d3bf94b88b93365ea30a5b81f6877ec9c0dee0a48"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e49b052b768bb74f58c7dda4e0bdf7b79d43a9204ca584ffe1fb48a6f3c84c66"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d14a0d029a4e176795cef99c056d58067c06195e0c7e2dbb293bf95c08f772a3"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:be49ad33819d7dcc28a309b86d4ed98e1a65f3075c6acd3cd4fe32103235222b"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a6d17e0370d2516d5bb9062c7b4cb731cff921fc875644c3d751ad857ba9c5b1"}, - {file = "lxml-5.2.2-cp38-cp38-win32.whl", hash = "sha256:5b8c041b6265e08eac8a724b74b655404070b636a8dd6d7a13c3adc07882ef30"}, - {file = "lxml-5.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:f61efaf4bed1cc0860e567d2ecb2363974d414f7f1f124b1df368bbf183453a6"}, - {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb91819461b1b56d06fa4bcf86617fac795f6a99d12239fb0c68dbeba41a0a30"}, - {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d4ed0c7cbecde7194cd3228c044e86bf73e30a23505af852857c09c24e77ec5d"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54401c77a63cc7d6dc4b4e173bb484f28a5607f3df71484709fe037c92d4f0ed"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:625e3ef310e7fa3a761d48ca7ea1f9d8718a32b1542e727d584d82f4453d5eeb"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:519895c99c815a1a24a926d5b60627ce5ea48e9f639a5cd328bda0515ea0f10c"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c7079d5eb1c1315a858bbf180000757db8ad904a89476653232db835c3114001"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:343ab62e9ca78094f2306aefed67dcfad61c4683f87eee48ff2fd74902447726"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:cd9e78285da6c9ba2d5c769628f43ef66d96ac3085e59b10ad4f3707980710d3"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:546cf886f6242dff9ec206331209db9c8e1643ae642dea5fdbecae2453cb50fd"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:02f6a8eb6512fdc2fd4ca10a49c341c4e109aa6e9448cc4859af5b949622715a"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:339ee4a4704bc724757cd5dd9dc8cf4d00980f5d3e6e06d5847c1b594ace68ab"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0a028b61a2e357ace98b1615fc03f76eb517cc028993964fe08ad514b1e8892d"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f90e552ecbad426eab352e7b2933091f2be77115bb16f09f78404861c8322981"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:d83e2d94b69bf31ead2fa45f0acdef0757fa0458a129734f59f67f3d2eb7ef32"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a02d3c48f9bb1e10c7788d92c0c7db6f2002d024ab6e74d6f45ae33e3d0288a3"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6d68ce8e7b2075390e8ac1e1d3a99e8b6372c694bbe612632606d1d546794207"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:453d037e09a5176d92ec0fd282e934ed26d806331a8b70ab431a81e2fbabf56d"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:3b019d4ee84b683342af793b56bb35034bd749e4cbdd3d33f7d1107790f8c472"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb3942960f0beb9f46e2a71a3aca220d1ca32feb5a398656be934320804c0df9"}, - {file = "lxml-5.2.2-cp39-cp39-win32.whl", hash = "sha256:ac6540c9fff6e3813d29d0403ee7a81897f1d8ecc09a8ff84d2eea70ede1cdbf"}, - {file = "lxml-5.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:610b5c77428a50269f38a534057444c249976433f40f53e3b47e68349cca1425"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b537bd04d7ccd7c6350cdaaaad911f6312cbd61e6e6045542f781c7f8b2e99d2"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4820c02195d6dfb7b8508ff276752f6b2ff8b64ae5d13ebe02e7667e035000b9"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a09f6184f17a80897172863a655467da2b11151ec98ba8d7af89f17bf63dae"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76acba4c66c47d27c8365e7c10b3d8016a7da83d3191d053a58382311a8bf4e1"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b128092c927eaf485928cec0c28f6b8bead277e28acf56800e972aa2c2abd7a2"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ae791f6bd43305aade8c0e22f816b34f3b72b6c820477aab4d18473a37e8090b"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a2f6a1bc2460e643785a2cde17293bd7a8f990884b822f7bca47bee0a82fc66b"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e8d351ff44c1638cb6e980623d517abd9f580d2e53bfcd18d8941c052a5a009"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec4bd9133420c5c52d562469c754f27c5c9e36ee06abc169612c959bd7dbb07"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:55ce6b6d803890bd3cc89975fca9de1dff39729b43b73cb15ddd933b8bc20484"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ab6a358d1286498d80fe67bd3d69fcbc7d1359b45b41e74c4a26964ca99c3f8"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:06668e39e1f3c065349c51ac27ae430719d7806c026fec462e5693b08b95696b"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9cd5323344d8ebb9fb5e96da5de5ad4ebab993bbf51674259dbe9d7a18049525"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89feb82ca055af0fe797a2323ec9043b26bc371365847dbe83c7fd2e2f181c34"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e481bba1e11ba585fb06db666bfc23dbe181dbafc7b25776156120bf12e0d5a6"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d6c6ea6a11ca0ff9cd0390b885984ed31157c168565702959c25e2191674a14"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3d98de734abee23e61f6b8c2e08a88453ada7d6486dc7cdc82922a03968928db"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:69ab77a1373f1e7563e0fb5a29a8440367dec051da6c7405333699d07444f511"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:34e17913c431f5ae01d8658dbf792fdc457073dcdfbb31dc0cc6ab256e664a8d"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f8757b03208c3f50097761be2dea0aba02e94f0dc7023ed73a7bb14ff11eb0"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a520b4f9974b0a0a6ed73c2154de57cdfd0c8800f4f15ab2b73238ffed0b36e"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5e097646944b66207023bc3c634827de858aebc226d5d4d6d16f0b77566ea182"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b5e4ef22ff25bfd4ede5f8fb30f7b24446345f3e79d9b7455aef2836437bc38a"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff69a9a0b4b17d78170c73abe2ab12084bdf1691550c5629ad1fe7849433f324"}, - {file = "lxml-5.2.2.tar.gz", hash = "sha256:bb2dc4898180bea79863d5487e5f9c7c34297414bad54bcd0f0852aee9cfdb87"}, +files = [ + {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c"}, + {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:696ea9e87442467819ac22394ca36cb3d01848dad1be6fac3fb612d3bd5a12cf"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef80aeac414f33c24b3815ecd560cee272786c3adfa5f31316d8b349bfade28"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b9c2754cef6963f3408ab381ea55f47dabc6f78f4b8ebb0f0b25cf1ac1f7609"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a62cc23d754bb449d63ff35334acc9f5c02e6dae830d78dab4dd12b78a524f4"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f82125bc7203c5ae8633a7d5d20bcfdff0ba33e436e4ab0abc026a53a8960b7"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b67319b4aef1a6c56576ff544b67a2a6fbd7eaee485b241cabf53115e8908b8f"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:a8ef956fce64c8551221f395ba21d0724fed6b9b6242ca4f2f7beb4ce2f41997"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:0a01ce7d8479dce84fc03324e3b0c9c90b1ece9a9bb6a1b6c9025e7e4520e78c"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91505d3ddebf268bb1588eb0f63821f738d20e1e7f05d3c647a5ca900288760b"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a3bcdde35d82ff385f4ede021df801b5c4a5bcdfb61ea87caabcebfc4945dc1b"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aea7c06667b987787c7d1f5e1dfcd70419b711cdb47d6b4bb4ad4b76777a0563"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:a7fb111eef4d05909b82152721a59c1b14d0f365e2be4c742a473c5d7372f4f5"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43d549b876ce64aa18b2328faff70f5877f8c6dede415f80a2f799d31644d776"}, + {file = "lxml-5.4.0-cp310-cp310-win32.whl", hash = "sha256:75133890e40d229d6c5837b0312abbe5bac1c342452cf0e12523477cd3aa21e7"}, + {file = "lxml-5.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:de5b4e1088523e2b6f730d0509a9a813355b7f5659d70eb4f319c76beea2e250"}, + {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9"}, + {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751"}, + {file = "lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4"}, + {file = "lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539"}, + {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4"}, + {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc"}, + {file = "lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f"}, + {file = "lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2"}, + {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0"}, + {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a"}, + {file = "lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82"}, + {file = "lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f"}, + {file = "lxml-5.4.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7be701c24e7f843e6788353c055d806e8bd8466b52907bafe5d13ec6a6dbaecd"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb54f7c6bafaa808f27166569b1511fc42701a7713858dddc08afdde9746849e"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97dac543661e84a284502e0cf8a67b5c711b0ad5fb661d1bd505c02f8cf716d7"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:c70e93fba207106cb16bf852e421c37bbded92acd5964390aad07cb50d60f5cf"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9c886b481aefdf818ad44846145f6eaf373a20d200b5ce1a5c8e1bc2d8745410"}, + {file = "lxml-5.4.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:fa0e294046de09acd6146be0ed6727d1f42ded4ce3ea1e9a19c11b6774eea27c"}, + {file = "lxml-5.4.0-cp36-cp36m-win32.whl", hash = "sha256:61c7bbf432f09ee44b1ccaa24896d21075e533cd01477966a5ff5a71d88b2f56"}, + {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, + {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, + {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, + {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, + {file = "lxml-5.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eaf24066ad0b30917186420d51e2e3edf4b0e2ea68d8cd885b14dc8afdcf6556"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b31a3a77501d86d8ade128abb01082724c0dfd9524f542f2f07d693c9f1175f"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e108352e203c7afd0eb91d782582f00a0b16a948d204d4dec8565024fafeea5"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11a96c3b3f7551c8a8109aa65e8594e551d5a84c76bf950da33d0fb6dfafab7"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:ca755eebf0d9e62d6cb013f1261e510317a41bf4650f22963474a663fdfe02aa"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:4cd915c0fb1bed47b5e6d6edd424ac25856252f09120e3e8ba5154b6b921860e"}, + {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:226046e386556a45ebc787871d6d2467b32c37ce76c2680f5c608e25823ffc84"}, + {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b108134b9667bcd71236c5a02aad5ddd073e372fb5d48ea74853e009fe38acb6"}, + {file = "lxml-5.4.0-cp38-cp38-win32.whl", hash = "sha256:1320091caa89805df7dcb9e908add28166113dcd062590668514dbd510798c88"}, + {file = "lxml-5.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:073eb6dcdf1f587d9b88c8c93528b57eccda40209cf9be549d469b942b41d70b"}, + {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bda3ea44c39eb74e2488297bb39d47186ed01342f0022c8ff407c250ac3f498e"}, + {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9ceaf423b50ecfc23ca00b7f50b64baba85fb3fb91c53e2c9d00bc86150c7e40"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:664cdc733bc87449fe781dbb1f309090966c11cc0c0cd7b84af956a02a8a4729"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67ed8a40665b84d161bae3181aa2763beea3747f748bca5874b4af4d75998f87"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b4a3bd174cc9cdaa1afbc4620c049038b441d6ba07629d89a83b408e54c35cd"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b0989737a3ba6cf2a16efb857fb0dfa20bc5c542737fddb6d893fde48be45433"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:dc0af80267edc68adf85f2a5d9be1cdf062f973db6790c1d065e45025fa26140"}, + {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:639978bccb04c42677db43c79bdaa23785dc7f9b83bfd87570da8207872f1ce5"}, + {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a99d86351f9c15e4a901fc56404b485b1462039db59288b203f8c629260a142"}, + {file = "lxml-5.4.0-cp39-cp39-win32.whl", hash = "sha256:3e6d5557989cdc3ebb5302bbdc42b439733a841891762ded9514e74f60319ad6"}, + {file = "lxml-5.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8c9b7f16b63e65bbba889acb436a1034a82d34fa09752d754f88d708eca80e1"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1b717b00a71b901b4667226bba282dd462c42ccf618ade12f9ba3674e1fabc55"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27a9ded0f0b52098ff89dd4c418325b987feed2ea5cc86e8860b0f844285d740"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7ce10634113651d6f383aa712a194179dcd496bd8c41e191cec2099fa09de5"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53370c26500d22b45182f98847243efb518d268374a9570409d2e2276232fd37"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6364038c519dffdbe07e3cf42e6a7f8b90c275d4d1617a69bb59734c1a2d571"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b12cb6527599808ada9eb2cd6e0e7d3d8f13fe7bbb01c6311255a15ded4c7ab4"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5f11a1526ebd0dee85e7b1e39e39a0cc0d9d03fb527f56d8457f6df48a10dc0c"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48b4afaf38bf79109bb060d9016fad014a9a48fb244e11b94f74ae366a64d252"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de6f6bb8a7840c7bf216fb83eec4e2f79f7325eca8858167b68708b929ab2172"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5cca36a194a4eb4e2ed6be36923d3cffd03dcdf477515dea687185506583d4c9"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b7c86884ad23d61b025989d99bfdd92a7351de956e01c61307cb87035960bcb1"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:53d9469ab5460402c19553b56c3648746774ecd0681b1b27ea74d5d8a3ef5590"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:56dbdbab0551532bb26c19c914848d7251d73edb507c3079d6805fa8bba5b706"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14479c2ad1cb08b62bb941ba8e0e05938524ee3c3114644df905d2331c76cd57"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32697d2ea994e0db19c1df9e40275ffe84973e4232b5c274f47e7c1ec9763cdd"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:24f6df5f24fc3385f622c0c9d63fe34604893bc1a5bdbb2dbf5870f85f9a404a"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:151d6c40bc9db11e960619d2bf2ec5829f0aaffb10b41dcf6ad2ce0f3c0b2325"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4025bf2884ac4370a3243c5aa8d66d3cb9e15d3ddd0af2d796eccc5f0244390e"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9459e6892f59ecea2e2584ee1058f5d8f629446eab52ba2305ae13a32a059530"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47fb24cc0f052f0576ea382872b3fc7e1f7e3028e53299ea751839418ade92a6"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50441c9de951a153c698b9b99992e806b71c1f36d14b154592580ff4a9d0d877"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ab339536aa798b1e17750733663d272038bf28069761d5be57cb4a9b0137b4f8"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9776af1aad5a4b4a1317242ee2bea51da54b2a7b7b48674be736d463c999f37d"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:63e7968ff83da2eb6fdda967483a7a023aa497d85ad8f05c3ad9b1f2e8c84987"}, + {file = "lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd"}, ] [package.extras] cssselect = ["cssselect (>=0.7)"] -html-clean = ["lxml-html-clean"] +html-clean = ["lxml_html_clean"] html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] -source = ["Cython (>=3.0.10)"] +source = ["Cython (>=3.0.11,<3.1.0)"] [[package]] name = "markupsafe" @@ -1450,7 +1421,6 @@ version = "2.1.5" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" -groups = ["main", "dev"] files = [ {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, @@ -1520,7 +1490,6 @@ version = "3.9.4" description = "Python plotting package" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "matplotlib-3.9.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:c5fdd7abfb706dfa8d307af64a87f1a862879ec3cd8d0ec8637458f0885b9c50"}, {file = "matplotlib-3.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d89bc4e85e40a71d1477780366c27fb7c6494d293e1617788986f74e2a03d7ff"}, @@ -1586,8 +1555,6 @@ version = "2021.4.0" description = "IntelĀ® oneAPI Math Kernel Library" optional = false python-versions = "*" -groups = ["main", "dev"] -markers = "platform_system == \"Windows\"" files = [ {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"}, {file = "mkl-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:636d07d90e68ccc9630c654d47ce9fdeb036bb46e2b193b3a9ac8cfea683cce5"}, @@ -1602,14 +1569,13 @@ tbb = "==2021.*" [[package]] name = "more-itertools" -version = "10.3.0" +version = "10.7.0" description = "More routines for operating on iterables, beyond itertools" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "more-itertools-10.3.0.tar.gz", hash = "sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463"}, - {file = "more_itertools-10.3.0-py3-none-any.whl", hash = "sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320"}, + {file = "more_itertools-10.7.0-py3-none-any.whl", hash = "sha256:d43980384673cb07d2f7d2d918c616b30c659c089ee23953f601d6609c67510e"}, + {file = "more_itertools-10.7.0.tar.gz", hash = "sha256:9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3"}, ] [[package]] @@ -1618,7 +1584,6 @@ version = "0.5.10" description = "D3 Viewer for Matplotlib" optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "mpld3-0.5.10-py3-none-any.whl", hash = "sha256:80877acce87ea447380fad7374668737505c8c0684aab05238e7c5dc1fab38c1"}, {file = "mpld3-0.5.10.tar.gz", hash = "sha256:a478eb404fa5212505c59133cf272cd9a94105872e605597720e7f84de38fbc7"}, @@ -1634,7 +1599,6 @@ version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" optional = false python-versions = "*" -groups = ["main", "dev"] files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -1643,7 +1607,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] +gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] [[package]] @@ -1652,7 +1616,6 @@ version = "6.0.5" description = "multidict implementation" optional = false python-versions = ">=3.7" -groups = ["main"] files = [ {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, @@ -1752,7 +1715,6 @@ version = "3.2.1" description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.9" -groups = ["main", "dev"] files = [ {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"}, {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"}, @@ -1767,14 +1729,13 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] [[package]] name = "nltk" -version = "3.8.1" +version = "3.9.1" description = "Natural Language Toolkit" optional = false -python-versions = ">=3.7" -groups = ["dev"] +python-versions = ">=3.8" files = [ - {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, - {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, + {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, + {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, ] [package.dependencies] @@ -1797,7 +1758,6 @@ version = "1.9.0" description = "Node.js virtual environment builder" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["dev"] files = [ {file = "nodeenv-1.9.0-py2.py3-none-any.whl", hash = "sha256:508ecec98f9f3330b636d4448c0f1a56fc68017c68f1e7857ebc52acf0eb879a"}, {file = "nodeenv-1.9.0.tar.gz", hash = "sha256:07f144e90dae547bf0d4ee8da0ee42664a42a04e02ed68e06324348dafe4bdb1"}, @@ -1809,7 +1769,6 @@ version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" -groups = ["main", "dev"] files = [ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, @@ -1855,7 +1814,6 @@ version = "24.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] files = [ {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, @@ -1863,41 +1821,53 @@ files = [ [[package]] name = "pandas" -version = "2.2.2" +version = "2.2.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, - {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, - {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, - {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, - {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, - {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, - {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, +files = [ + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, ] [package.dependencies] @@ -1937,116 +1907,116 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pie-core" -version = "0.2.0" +version = "0.1.2" description = "Core modules of PyTorch-IE" optional = false python-versions = "<4.0,>=3.9" -groups = ["main"] files = [ - {file = "pie_core-0.2.0-py3-none-any.whl", hash = "sha256:8bb9740304c9b58246485c9170717d2b947d732d71d2a9d5176f8572d8b98b2a"}, - {file = "pie_core-0.2.0.tar.gz", hash = "sha256:7e41cbe6b554285acbcae8822d84d97c19820ddd7b27dc9751349eb48bc61eb1"}, + {file = "pie_core-0.1.2-py3-none-any.whl", hash = "sha256:2118e7e0d7002182e3c071e7806eece73ce7d68789cf6a0b8a3a718cf3eb910d"}, + {file = "pie_core-0.1.2.tar.gz", hash = "sha256:08c883a2e7618f0bfdeaa428a51a95753cbf3a9b12eb5cca1ab3263d98177303"}, ] [package.dependencies] huggingface_hub = ">=0.23.4,<0.26.0" -torch = ">=2.1.0,<3.0.0" +pytorch-lightning = ">=2.1.0,<3.0.0" [[package]] name = "pillow" -version = "10.4.0" +version = "11.2.1" description = "Python Imaging Library (Fork)" optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, - {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, - {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"}, - {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"}, - {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"}, - {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"}, - {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"}, - {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"}, - {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"}, - {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"}, - {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"}, - {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"}, - {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"}, - {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"}, - {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"}, - {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"}, - {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"}, - {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"}, - {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"}, - {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"}, - {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"}, - {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"}, - {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"}, - {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"}, - {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"}, - {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"}, - {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"}, - {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"}, - {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"}, - {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"}, - {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"}, - {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"}, - {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"}, - {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"}, - {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"}, - {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"}, - {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"}, - {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"}, - {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"}, - {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"}, - {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"}, - {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"}, - {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"}, - {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"}, - {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"}, - {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"}, - {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"}, - {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"}, - {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"}, - {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"}, - {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"}, - {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"}, - {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"}, - {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"}, - {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"}, - {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"}, - {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"}, - {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"}, - {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"}, - {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"}, - {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"}, - {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"}, - {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"}, - {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"}, - {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"}, - {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"}, - {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"}, - {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"}, - {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"}, - {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"}, - {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"}, - {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"}, - {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"}, - {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"}, - {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"}, - {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"}, - {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"}, - {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"}, - {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"}, - {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"}, +python-versions = ">=3.9" +files = [ + {file = "pillow-11.2.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:d57a75d53922fc20c165016a20d9c44f73305e67c351bbc60d1adaf662e74047"}, + {file = "pillow-11.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:127bf6ac4a5b58b3d32fc8289656f77f80567d65660bc46f72c0d77e6600cc95"}, + {file = "pillow-11.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4ba4be812c7a40280629e55ae0b14a0aafa150dd6451297562e1764808bbe61"}, + {file = "pillow-11.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8bd62331e5032bc396a93609982a9ab6b411c05078a52f5fe3cc59234a3abd1"}, + {file = "pillow-11.2.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:562d11134c97a62fe3af29581f083033179f7ff435f78392565a1ad2d1c2c45c"}, + {file = "pillow-11.2.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c97209e85b5be259994eb5b69ff50c5d20cca0f458ef9abd835e262d9d88b39d"}, + {file = "pillow-11.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0c3e6d0f59171dfa2e25d7116217543310908dfa2770aa64b8f87605f8cacc97"}, + {file = "pillow-11.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc1c3bc53befb6096b84165956e886b1729634a799e9d6329a0c512ab651e579"}, + {file = "pillow-11.2.1-cp310-cp310-win32.whl", hash = "sha256:312c77b7f07ab2139924d2639860e084ec2a13e72af54d4f08ac843a5fc9c79d"}, + {file = "pillow-11.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:9bc7ae48b8057a611e5fe9f853baa88093b9a76303937449397899385da06fad"}, + {file = "pillow-11.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:2728567e249cdd939f6cc3d1f049595c66e4187f3c34078cbc0a7d21c47482d2"}, + {file = "pillow-11.2.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35ca289f712ccfc699508c4658a1d14652e8033e9b69839edf83cbdd0ba39e70"}, + {file = "pillow-11.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0409af9f829f87a2dfb7e259f78f317a5351f2045158be321fd135973fff7bf"}, + {file = "pillow-11.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4e5c5edee874dce4f653dbe59db7c73a600119fbea8d31f53423586ee2aafd7"}, + {file = "pillow-11.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b93a07e76d13bff9444f1a029e0af2964e654bfc2e2c2d46bfd080df5ad5f3d8"}, + {file = "pillow-11.2.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:e6def7eed9e7fa90fde255afaf08060dc4b343bbe524a8f69bdd2a2f0018f600"}, + {file = "pillow-11.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:8f4f3724c068be008c08257207210c138d5f3731af6c155a81c2b09a9eb3a788"}, + {file = "pillow-11.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a0a6709b47019dff32e678bc12c63008311b82b9327613f534e496dacaefb71e"}, + {file = "pillow-11.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f6b0c664ccb879109ee3ca702a9272d877f4fcd21e5eb63c26422fd6e415365e"}, + {file = "pillow-11.2.1-cp311-cp311-win32.whl", hash = "sha256:cc5d875d56e49f112b6def6813c4e3d3036d269c008bf8aef72cd08d20ca6df6"}, + {file = "pillow-11.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:0f5c7eda47bf8e3c8a283762cab94e496ba977a420868cb819159980b6709193"}, + {file = "pillow-11.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:4d375eb838755f2528ac8cbc926c3e31cc49ca4ad0cf79cff48b20e30634a4a7"}, + {file = "pillow-11.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f"}, + {file = "pillow-11.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b"}, + {file = "pillow-11.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d"}, + {file = "pillow-11.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4"}, + {file = "pillow-11.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d"}, + {file = "pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4"}, + {file = "pillow-11.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443"}, + {file = "pillow-11.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c"}, + {file = "pillow-11.2.1-cp312-cp312-win32.whl", hash = "sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3"}, + {file = "pillow-11.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941"}, + {file = "pillow-11.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb"}, + {file = "pillow-11.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28"}, + {file = "pillow-11.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830"}, + {file = "pillow-11.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0"}, + {file = "pillow-11.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1"}, + {file = "pillow-11.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f"}, + {file = "pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155"}, + {file = "pillow-11.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14"}, + {file = "pillow-11.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b"}, + {file = "pillow-11.2.1-cp313-cp313-win32.whl", hash = "sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2"}, + {file = "pillow-11.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691"}, + {file = "pillow-11.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c"}, + {file = "pillow-11.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22"}, + {file = "pillow-11.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7"}, + {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16"}, + {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b"}, + {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406"}, + {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91"}, + {file = "pillow-11.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751"}, + {file = "pillow-11.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9"}, + {file = "pillow-11.2.1-cp313-cp313t-win32.whl", hash = "sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd"}, + {file = "pillow-11.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e"}, + {file = "pillow-11.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681"}, + {file = "pillow-11.2.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:7491cf8a79b8eb867d419648fff2f83cb0b3891c8b36da92cc7f1931d46108c8"}, + {file = "pillow-11.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b02d8f9cb83c52578a0b4beadba92e37d83a4ef11570a8688bbf43f4ca50909"}, + {file = "pillow-11.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:014ca0050c85003620526b0ac1ac53f56fc93af128f7546623cc8e31875ab928"}, + {file = "pillow-11.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3692b68c87096ac6308296d96354eddd25f98740c9d2ab54e1549d6c8aea9d79"}, + {file = "pillow-11.2.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:f781dcb0bc9929adc77bad571b8621ecb1e4cdef86e940fe2e5b5ee24fd33b35"}, + {file = "pillow-11.2.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:2b490402c96f907a166615e9a5afacf2519e28295f157ec3a2bb9bd57de638cb"}, + {file = "pillow-11.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dd6b20b93b3ccc9c1b597999209e4bc5cf2853f9ee66e3fc9a400a78733ffc9a"}, + {file = "pillow-11.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4b835d89c08a6c2ee7781b8dd0a30209a8012b5f09c0a665b65b0eb3560b6f36"}, + {file = "pillow-11.2.1-cp39-cp39-win32.whl", hash = "sha256:b10428b3416d4f9c61f94b494681280be7686bda15898a3a9e08eb66a6d92d67"}, + {file = "pillow-11.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:6ebce70c3f486acf7591a3d73431fa504a4e18a9b97ff27f5f47b7368e4b9dd1"}, + {file = "pillow-11.2.1-cp39-cp39-win_arm64.whl", hash = "sha256:c27476257b2fdcd7872d54cfd119b3a9ce4610fb85c8e32b70b42e3680a29a1e"}, + {file = "pillow-11.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9b7b0d4fd2635f54ad82785d56bc0d94f147096493a79985d0ab57aedd563156"}, + {file = "pillow-11.2.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:aa442755e31c64037aa7c1cb186e0b369f8416c567381852c63444dd666fb772"}, + {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0d3348c95b766f54b76116d53d4cb171b52992a1027e7ca50c81b43b9d9e363"}, + {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85d27ea4c889342f7e35f6d56e7e1cb345632ad592e8c51b693d7b7556043ce0"}, + {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bf2c33d6791c598142f00c9c4c7d47f6476731c31081331664eb26d6ab583e01"}, + {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e616e7154c37669fc1dfc14584f11e284e05d1c650e1c0f972f281c4ccc53193"}, + {file = "pillow-11.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:39ad2e0f424394e3aebc40168845fee52df1394a4673a6ee512d840d14ab3013"}, + {file = "pillow-11.2.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:80f1df8dbe9572b4b7abdfa17eb5d78dd620b1d55d9e25f834efdbee872d3aed"}, + {file = "pillow-11.2.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ea926cfbc3957090becbcbbb65ad177161a2ff2ad578b5a6ec9bb1e1cd78753c"}, + {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:738db0e0941ca0376804d4de6a782c005245264edaa253ffce24e5a15cbdc7bd"}, + {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db98ab6565c69082ec9b0d4e40dd9f6181dab0dd236d26f7a50b8b9bfbd5076"}, + {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:036e53f4170e270ddb8797d4c590e6dd14d28e15c7da375c18978045f7e6c37b"}, + {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:14f73f7c291279bd65fda51ee87affd7c1e097709f7fdd0188957a16c264601f"}, + {file = "pillow-11.2.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:208653868d5c9ecc2b327f9b9ef34e0e42a4cdd172c2988fd81d62d2bc9bc044"}, + {file = "pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6"}, ] [package.extras] -docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] fpx = ["olefile"] mic = ["olefile"] -tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions ; python_version < \"3.10\""] +test-arrow = ["pyarrow"] +tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] +typing = ["typing-extensions"] xmp = ["defusedxml"] [[package]] @@ -2055,7 +2025,6 @@ version = "4.2.2" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, @@ -2072,7 +2041,6 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -2088,7 +2056,6 @@ version = "3.1" description = "Pretty print trees" optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "pptree-3.1.tar.gz", hash = "sha256:4dd0ba2f58000cbd29d68a5b64bac29bcb5a663642f79404877c0059668a69f6"}, ] @@ -2099,7 +2066,6 @@ version = "3.7.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "pre_commit-3.7.1-py2.py3-none-any.whl", hash = "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5"}, {file = "pre_commit-3.7.1.tar.gz", hash = "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a"}, @@ -2114,65 +2080,54 @@ virtualenv = ">=20.10.0" [[package]] name = "protobuf" -version = "5.27.2" +version = "6.30.2" description = "" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "protobuf-5.27.2-cp310-abi3-win32.whl", hash = "sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38"}, - {file = "protobuf-5.27.2-cp310-abi3-win_amd64.whl", hash = "sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505"}, - {file = "protobuf-5.27.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5"}, - {file = "protobuf-5.27.2-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b"}, - {file = "protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e"}, - {file = "protobuf-5.27.2-cp38-cp38-win32.whl", hash = "sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863"}, - {file = "protobuf-5.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6"}, - {file = "protobuf-5.27.2-cp39-cp39-win32.whl", hash = "sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca"}, - {file = "protobuf-5.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce"}, - {file = "protobuf-5.27.2-py3-none-any.whl", hash = "sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470"}, - {file = "protobuf-5.27.2.tar.gz", hash = "sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714"}, + {file = "protobuf-6.30.2-cp310-abi3-win32.whl", hash = "sha256:b12ef7df7b9329886e66404bef5e9ce6a26b54069d7f7436a0853ccdeb91c103"}, + {file = "protobuf-6.30.2-cp310-abi3-win_amd64.whl", hash = "sha256:7653c99774f73fe6b9301b87da52af0e69783a2e371e8b599b3e9cb4da4b12b9"}, + {file = "protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:0eb523c550a66a09a0c20f86dd554afbf4d32b02af34ae53d93268c1f73bc65b"}, + {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:50f32cc9fd9cb09c783ebc275611b4f19dfdfb68d1ee55d2f0c7fa040df96815"}, + {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4f6c687ae8efae6cf6093389a596548214467778146b7245e886f35e1485315d"}, + {file = "protobuf-6.30.2-cp39-cp39-win32.whl", hash = "sha256:524afedc03b31b15586ca7f64d877a98b184f007180ce25183d1a5cb230ee72b"}, + {file = "protobuf-6.30.2-cp39-cp39-win_amd64.whl", hash = "sha256:acec579c39c88bd8fbbacab1b8052c793efe83a0a5bd99db4a31423a25c0a0e2"}, + {file = "protobuf-6.30.2-py3-none-any.whl", hash = "sha256:ae86b030e69a98e08c77beab574cbcb9fff6d031d57209f574a5aea1445f4b51"}, + {file = "protobuf-6.30.2.tar.gz", hash = "sha256:35c859ae076d8c56054c25b59e5e59638d86545ed6e2b6efac6be0b6ea3ba048"}, ] [[package]] name = "psutil" -version = "6.0.0" -description = "Cross-platform lib for process and system monitoring in Python." +version = "7.0.0" +description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -groups = ["dev"] -files = [ - {file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"}, - {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"}, - {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c"}, - {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3"}, - {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c"}, - {file = "psutil-6.0.0-cp27-none-win32.whl", hash = "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35"}, - {file = "psutil-6.0.0-cp27-none-win_amd64.whl", hash = "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1"}, - {file = "psutil-6.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0"}, - {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0"}, - {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd"}, - {file = "psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132"}, - {file = "psutil-6.0.0-cp36-cp36m-win32.whl", hash = "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14"}, - {file = "psutil-6.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c"}, - {file = "psutil-6.0.0-cp37-abi3-win32.whl", hash = "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d"}, - {file = "psutil-6.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3"}, - {file = "psutil-6.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0"}, - {file = "psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2"}, +python-versions = ">=3.6" +files = [ + {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, + {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, + {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, + {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, + {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, + {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, + {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, ] [package.extras] -test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""] +dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] [[package]] name = "pyparsing" -version = "3.1.2" +version = "3.2.3" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false -python-versions = ">=3.6.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, - {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, + {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"}, + {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"}, ] [package.extras] @@ -2184,7 +2139,6 @@ version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["dev"] files = [ {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, @@ -2197,7 +2151,6 @@ version = "7.4.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, @@ -2220,7 +2173,6 @@ version = "4.1.0" description = "Pytest plugin for measuring coverage." optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, @@ -2239,7 +2191,6 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "dev"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2248,47 +2199,12 @@ files = [ [package.dependencies] six = ">=1.5" -[[package]] -name = "pytorch-crf" -version = "0.7.2" -description = "Conditional random field in PyTorch" -optional = false -python-versions = ">=3.6, <4" -groups = ["dev"] -files = [ - {file = "pytorch-crf-0.7.2.tar.gz", hash = "sha256:e6456e22ccfc99a3d4fe1e03e996103b1b39e9830bf3c7e12e7a9077d3be866d"}, - {file = "pytorch_crf-0.7.2-py3-none-any.whl", hash = "sha256:1b2d7d5eea3255f6e0cac09ab8b645472e76ff70d9333bc88762cf7317a4992d"}, -] - -[[package]] -name = "pytorch-ie" -version = "0.31.9" -description = "State-of-the-art Information Extraction in PyTorch" -optional = false -python-versions = "<4.0,>=3.9" -groups = ["main"] -files = [ - {file = "pytorch_ie-0.31.9-py3-none-any.whl", hash = "sha256:002eab323d529022e13a1ed1a7effc43e1bc172bcc11abe58c46501a8c37eb54"}, - {file = "pytorch_ie-0.31.9.tar.gz", hash = "sha256:bd516817ce759c059fcbe61c8d420367366c82014d3ba49938a61cb564102610"}, -] - -[package.dependencies] -absl-py = ">=1.0.0,<2.0.0" -fsspec = "<2023.9.0" -pandas = ">=2.0.0,<3.0.0" -pie-core = ">=0.2.0,<0.3.0" -pytorch-lightning = ">=2,<3" -torch = ">=1.10" -torchmetrics = ">=1,<2" -transformers = ">=4.18,<5.0" - [[package]] name = "pytorch-lightning" version = "2.2.5" description = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate." optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "pytorch-lightning-2.2.5.tar.gz", hash = "sha256:8d06d0166e2204f82864f5d2b53a367c2c375d9cd5a7f6174434b2dffeaef7e9"}, {file = "pytorch_lightning-2.2.5-py3-none-any.whl", hash = "sha256:67a7800863326914f68f6afd68f427855ef2315b4f00d554be8ea4c0f0557fd8"}, @@ -2306,12 +2222,12 @@ tqdm = ">=4.57.0" typing-extensions = ">=4.4.0" [package.extras] -all = ["bitsandbytes (==0.41.0)", "deepspeed (>=0.8.2,<=0.9.3) ; platform_system != \"Windows\"", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.15.0)", "jsonargparse[signatures] (>=4.27.7)", "lightning-utilities (>=0.8.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "requests (<2.32.0)", "rich (>=12.3.0)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)"] -deepspeed = ["deepspeed (>=0.8.2,<=0.9.3) ; platform_system != \"Windows\""] -dev = ["bitsandbytes (==0.41.0)", "cloudpickle (>=1.3)", "coverage (==7.3.1)", "deepspeed (>=0.8.2,<=0.9.3) ; platform_system != \"Windows\"", "fastapi", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.15.0)", "jsonargparse[signatures] (>=4.27.7)", "lightning-utilities (>=0.8.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "onnx (>=0.14.0)", "onnxruntime (>=0.15.0)", "pandas (>1.0)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-random-order (==1.1.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "requests (<2.32.0)", "rich (>=12.3.0)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)", "uvicorn"] +all = ["bitsandbytes (==0.41.0)", "deepspeed (>=0.8.2,<=0.9.3)", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.15.0)", "jsonargparse[signatures] (>=4.27.7)", "lightning-utilities (>=0.8.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "requests (<2.32.0)", "rich (>=12.3.0)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)"] +deepspeed = ["deepspeed (>=0.8.2,<=0.9.3)"] +dev = ["bitsandbytes (==0.41.0)", "cloudpickle (>=1.3)", "coverage (==7.3.1)", "deepspeed (>=0.8.2,<=0.9.3)", "fastapi", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.15.0)", "jsonargparse[signatures] (>=4.27.7)", "lightning-utilities (>=0.8.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "onnx (>=0.14.0)", "onnxruntime (>=0.15.0)", "pandas (>1.0)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-random-order (==1.1.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "requests (<2.32.0)", "rich (>=12.3.0)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)", "uvicorn"] examples = ["gym[classic-control] (>=0.17.0)", "ipython[all] (<8.15.0)", "lightning-utilities (>=0.8.0)", "requests (<2.32.0)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)"] extra = ["bitsandbytes (==0.41.0)", "hydra-core (>=1.0.5)", "jsonargparse[signatures] (>=4.27.7)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "rich (>=12.3.0)", "tensorboardX (>=2.2)"] -strategies = ["deepspeed (>=0.8.2,<=0.9.3) ; platform_system != \"Windows\""] +strategies = ["deepspeed (>=0.8.2,<=0.9.3)"] test = ["cloudpickle (>=1.3)", "coverage (==7.3.1)", "fastapi", "onnx (>=0.14.0)", "onnxruntime (>=0.15.0)", "pandas (>1.0)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-random-order (==1.1.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "uvicorn"] [[package]] @@ -2320,7 +2236,6 @@ version = "0.2.0" description = "A pytorch module (and function) to reverse gradients." optional = false python-versions = ">=3.5" -groups = ["dev"] files = [ {file = "pytorch_revgrad-0.2.0-py3-none-any.whl", hash = "sha256:2276fb189b2ce26f756a97effe2a6bcf8f7fdc60542c5dfb45c53f09ef123aa7"}, {file = "pytorch_revgrad-0.2.0.tar.gz", hash = "sha256:9cf097a7d18cbadddeaec9fef74b258d70b6cb8d0c77f524baab18bffc7d7be9"}, @@ -2336,14 +2251,13 @@ test = ["coveralls", "flake8", "pytest", "pytest-cov", "pytest-flake8"] [[package]] name = "pytz" -version = "2024.1" +version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" -groups = ["main"] files = [ - {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, - {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] [[package]] @@ -2352,7 +2266,6 @@ version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" -groups = ["main", "dev"] files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, @@ -2409,91 +2322,105 @@ files = [ [[package]] name = "regex" -version = "2024.5.15" +version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" -groups = ["main", "dev"] -files = [ - {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, - {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"}, - {file = "regex-2024.5.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0bd000c6e266927cb7a1bc39d55be95c4b4f65c5be53e659537537e019232b1"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eaa7ddaf517aa095fa8da0b5015c44d03da83f5bd49c87961e3c997daed0de7"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba68168daedb2c0bab7fd7e00ced5ba90aebf91024dea3c88ad5063c2a562cca"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e8d717bca3a6e2064fc3a08df5cbe366369f4b052dcd21b7416e6d71620dca1"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1337b7dbef9b2f71121cdbf1e97e40de33ff114801263b275aafd75303bd62b5"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9ebd0a36102fcad2f03696e8af4ae682793a5d30b46c647eaf280d6cfb32796"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9efa1a32ad3a3ea112224897cdaeb6aa00381627f567179c0314f7b65d354c62"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1595f2d10dff3d805e054ebdc41c124753631b6a471b976963c7b28543cf13b0"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b802512f3e1f480f41ab5f2cfc0e2f761f08a1f41092d6718868082fc0d27143"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a0981022dccabca811e8171f913de05720590c915b033b7e601f35ce4ea7019f"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:19068a6a79cf99a19ccefa44610491e9ca02c2be3305c7760d3831d38a467a6f"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b5269484f6126eee5e687785e83c6b60aad7663dafe842b34691157e5083e53"}, - {file = "regex-2024.5.15-cp310-cp310-win32.whl", hash = "sha256:ada150c5adfa8fbcbf321c30c751dc67d2f12f15bd183ffe4ec7cde351d945b3"}, - {file = "regex-2024.5.15-cp310-cp310-win_amd64.whl", hash = "sha256:ac394ff680fc46b97487941f5e6ae49a9f30ea41c6c6804832063f14b2a5a145"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f5b1dff3ad008dccf18e652283f5e5339d70bf8ba7c98bf848ac33db10f7bc7a"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6a2b494a76983df8e3d3feea9b9ffdd558b247e60b92f877f93a1ff43d26656"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a32b96f15c8ab2e7d27655969a23895eb799de3665fa94349f3b2fbfd547236f"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10002e86e6068d9e1c91eae8295ef690f02f913c57db120b58fdd35a6bb1af35"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec54d5afa89c19c6dd8541a133be51ee1017a38b412b1321ccb8d6ddbeb4cf7d"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10e4ce0dca9ae7a66e6089bb29355d4432caed736acae36fef0fdd7879f0b0cb"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e507ff1e74373c4d3038195fdd2af30d297b4f0950eeda6f515ae3d84a1770f"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1f059a4d795e646e1c37665b9d06062c62d0e8cc3c511fe01315973a6542e40"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0721931ad5fe0dda45d07f9820b90b2148ccdd8e45bb9e9b42a146cb4f695649"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:833616ddc75ad595dee848ad984d067f2f31be645d603e4d158bba656bbf516c"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:287eb7f54fc81546346207c533ad3c2c51a8d61075127d7f6d79aaf96cdee890"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:19dfb1c504781a136a80ecd1fff9f16dddf5bb43cec6871778c8a907a085bb3d"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:119af6e56dce35e8dfb5222573b50c89e5508d94d55713c75126b753f834de68"}, - {file = "regex-2024.5.15-cp311-cp311-win32.whl", hash = "sha256:1c1c174d6ec38d6c8a7504087358ce9213d4332f6293a94fbf5249992ba54efa"}, - {file = "regex-2024.5.15-cp311-cp311-win_amd64.whl", hash = "sha256:9e717956dcfd656f5055cc70996ee2cc82ac5149517fc8e1b60261b907740201"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:632b01153e5248c134007209b5c6348a544ce96c46005d8456de1d552455b014"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e64198f6b856d48192bf921421fdd8ad8eb35e179086e99e99f711957ffedd6e"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68811ab14087b2f6e0fc0c2bae9ad689ea3584cad6917fc57be6a48bbd012c49"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ec0c2fea1e886a19c3bee0cd19d862b3aa75dcdfb42ebe8ed30708df64687a"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0c0c0003c10f54a591d220997dd27d953cd9ccc1a7294b40a4be5312be8797b"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2431b9e263af1953c55abbd3e2efca67ca80a3de8a0437cb58e2421f8184717a"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a605586358893b483976cffc1723fb0f83e526e8f14c6e6614e75919d9862cf"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391d7f7f1e409d192dba8bcd42d3e4cf9e598f3979cdaed6ab11288da88cb9f2"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ff11639a8d98969c863d4617595eb5425fd12f7c5ef6621a4b74b71ed8726d5"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4eee78a04e6c67e8391edd4dad3279828dd66ac4b79570ec998e2155d2e59fd5"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8fe45aa3f4aa57faabbc9cb46a93363edd6197cbc43523daea044e9ff2fea83e"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d0a3d8d6acf0c78a1fff0e210d224b821081330b8524e3e2bc5a68ef6ab5803d"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c486b4106066d502495b3025a0a7251bf37ea9540433940a23419461ab9f2a80"}, - {file = "regex-2024.5.15-cp312-cp312-win32.whl", hash = "sha256:c49e15eac7c149f3670b3e27f1f28a2c1ddeccd3a2812cba953e01be2ab9b5fe"}, - {file = "regex-2024.5.15-cp312-cp312-win_amd64.whl", hash = "sha256:673b5a6da4557b975c6c90198588181029c60793835ce02f497ea817ff647cb2"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:87e2a9c29e672fc65523fb47a90d429b70ef72b901b4e4b1bd42387caf0d6835"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c3bea0ba8b73b71b37ac833a7f3fd53825924165da6a924aec78c13032f20850"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfc4f82cabe54f1e7f206fd3d30fda143f84a63fe7d64a81558d6e5f2e5aaba9"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5bb9425fe881d578aeca0b2b4b3d314ec88738706f66f219c194d67179337cb"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64c65783e96e563103d641760664125e91bd85d8e49566ee560ded4da0d3e704"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf2430df4148b08fb4324b848672514b1385ae3807651f3567871f130a728cc3"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5397de3219a8b08ae9540c48f602996aa6b0b65d5a61683e233af8605c42b0f2"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:455705d34b4154a80ead722f4f185b04c4237e8e8e33f265cd0798d0e44825fa"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2b6f1b3bb6f640c1a92be3bbfbcb18657b125b99ecf141fb3310b5282c7d4ed"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3ad070b823ca5890cab606c940522d05d3d22395d432f4aaaf9d5b1653e47ced"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5b5467acbfc153847d5adb21e21e29847bcb5870e65c94c9206d20eb4e99a384"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e6662686aeb633ad65be2a42b4cb00178b3fbf7b91878f9446075c404ada552f"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2b4c884767504c0e2401babe8b5b7aea9148680d2e157fa28f01529d1f7fcf67"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3cd7874d57f13bf70078f1ff02b8b0aa48d5b9ed25fc48547516c6aba36f5741"}, - {file = "regex-2024.5.15-cp38-cp38-win32.whl", hash = "sha256:e4682f5ba31f475d58884045c1a97a860a007d44938c4c0895f41d64481edbc9"}, - {file = "regex-2024.5.15-cp38-cp38-win_amd64.whl", hash = "sha256:d99ceffa25ac45d150e30bd9ed14ec6039f2aad0ffa6bb87a5936f5782fc1569"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13cdaf31bed30a1e1c2453ef6015aa0983e1366fad2667657dbcac7b02f67133"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cac27dcaa821ca271855a32188aa61d12decb6fe45ffe3e722401fe61e323cd1"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7dbe2467273b875ea2de38ded4eba86cbcbc9a1a6d0aa11dcf7bd2e67859c435"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f18a9a3513a99c4bef0e3efd4c4a5b11228b48aa80743be822b71e132ae4f5"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d347a741ea871c2e278fde6c48f85136c96b8659b632fb57a7d1ce1872547600"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1878b8301ed011704aea4c806a3cadbd76f84dece1ec09cc9e4dc934cfa5d4da"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4babf07ad476aaf7830d77000874d7611704a7fcf68c9c2ad151f5d94ae4bfc4"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35cb514e137cb3488bce23352af3e12fb0dbedd1ee6e60da053c69fb1b29cc6c"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cdd09d47c0b2efee9378679f8510ee6955d329424c659ab3c5e3a6edea696294"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:72d7a99cd6b8f958e85fc6ca5b37c4303294954eac1376535b03c2a43eb72629"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a094801d379ab20c2135529948cb84d417a2169b9bdceda2a36f5f10977ebc16"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c0c18345010870e58238790a6779a1219b4d97bd2e77e1140e8ee5d14df071aa"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:16093f563098448ff6b1fa68170e4acbef94e6b6a4e25e10eae8598bb1694b5d"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e38a7d4e8f633a33b4c7350fbd8bad3b70bf81439ac67ac38916c4a86b465456"}, - {file = "regex-2024.5.15-cp39-cp39-win32.whl", hash = "sha256:71a455a3c584a88f654b64feccc1e25876066c4f5ef26cd6dd711308aa538694"}, - {file = "regex-2024.5.15-cp39-cp39-win_amd64.whl", hash = "sha256:cab12877a9bdafde5500206d1020a584355a97884dfd388af3699e9137bf7388"}, - {file = "regex-2024.5.15.tar.gz", hash = "sha256:d3ee02d9e5f482cc8309134a91eeaacbdd2261ba111b0fef3748eeb4913e6a2c"}, +files = [ + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"}, + {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"}, + {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"}, + {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"}, + {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"}, + {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"}, + {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"}, + {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"}, + {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"}, + {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"}, + {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"}, + {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"}, + {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"}, + {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"}, ] [[package]] @@ -2502,7 +2429,6 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" -groups = ["main", "dev"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -2521,130 +2447,43 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "s3transfer" -version = "0.10.2" +version = "0.12.0" description = "An Amazon S3 Transfer Manager" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"}, - {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"}, + {file = "s3transfer-0.12.0-py3-none-any.whl", hash = "sha256:35b314d7d82865756edab59f7baebc6b477189e6ab4c53050e28c1de4d9cce18"}, + {file = "s3transfer-0.12.0.tar.gz", hash = "sha256:8ac58bc1989a3fdb7c7f3ee0918a66b160d038a147c7b5db1500930a607e9a1c"}, ] [package.dependencies] -botocore = ">=1.33.2,<2.0a.0" +botocore = ">=1.37.4,<2.0a.0" [package.extras] -crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] +crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"] [[package]] name = "safetensors" -version = "0.4.3" +version = "0.5.3" description = "" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] -files = [ - {file = "safetensors-0.4.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dcf5705cab159ce0130cd56057f5f3425023c407e170bca60b4868048bae64fd"}, - {file = "safetensors-0.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bb4f8c5d0358a31e9a08daeebb68f5e161cdd4018855426d3f0c23bb51087055"}, - {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70a5319ef409e7f88686a46607cbc3c428271069d8b770076feaf913664a07ac"}, - {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb9c65bd82f9ef3ce4970dc19ee86be5f6f93d032159acf35e663c6bea02b237"}, - {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edb5698a7bc282089f64c96c477846950358a46ede85a1c040e0230344fdde10"}, - {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:efcc860be094b8d19ac61b452ec635c7acb9afa77beb218b1d7784c6d41fe8ad"}, - {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d88b33980222085dd6001ae2cad87c6068e0991d4f5ccf44975d216db3b57376"}, - {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5fc6775529fb9f0ce2266edd3e5d3f10aab068e49f765e11f6f2a63b5367021d"}, - {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9c6ad011c1b4e3acff058d6b090f1da8e55a332fbf84695cf3100c649cc452d1"}, - {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c496c5401c1b9c46d41a7688e8ff5b0310a3b9bae31ce0f0ae870e1ea2b8caf"}, - {file = "safetensors-0.4.3-cp310-none-win32.whl", hash = "sha256:38e2a8666178224a51cca61d3cb4c88704f696eac8f72a49a598a93bbd8a4af9"}, - {file = "safetensors-0.4.3-cp310-none-win_amd64.whl", hash = "sha256:393e6e391467d1b2b829c77e47d726f3b9b93630e6a045b1d1fca67dc78bf632"}, - {file = "safetensors-0.4.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:22f3b5d65e440cec0de8edaa672efa888030802e11c09b3d6203bff60ebff05a"}, - {file = "safetensors-0.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c4fa560ebd4522adddb71dcd25d09bf211b5634003f015a4b815b7647d62ebe"}, - {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9afd5358719f1b2cf425fad638fc3c887997d6782da317096877e5b15b2ce93"}, - {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d8c5093206ef4b198600ae484230402af6713dab1bd5b8e231905d754022bec7"}, - {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0b2104df1579d6ba9052c0ae0e3137c9698b2d85b0645507e6fd1813b70931a"}, - {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8cf18888606dad030455d18f6c381720e57fc6a4170ee1966adb7ebc98d4d6a3"}, - {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0bf4f9d6323d9f86eef5567eabd88f070691cf031d4c0df27a40d3b4aaee755b"}, - {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:585c9ae13a205807b63bef8a37994f30c917ff800ab8a1ca9c9b5d73024f97ee"}, - {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faefeb3b81bdfb4e5a55b9bbdf3d8d8753f65506e1d67d03f5c851a6c87150e9"}, - {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:befdf0167ad626f22f6aac6163477fcefa342224a22f11fdd05abb3995c1783c"}, - {file = "safetensors-0.4.3-cp311-none-win32.whl", hash = "sha256:a7cef55929dcbef24af3eb40bedec35d82c3c2fa46338bb13ecf3c5720af8a61"}, - {file = "safetensors-0.4.3-cp311-none-win_amd64.whl", hash = "sha256:840b7ac0eff5633e1d053cc9db12fdf56b566e9403b4950b2dc85393d9b88d67"}, - {file = "safetensors-0.4.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:22d21760dc6ebae42e9c058d75aa9907d9f35e38f896e3c69ba0e7b213033856"}, - {file = "safetensors-0.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d22c1a10dff3f64d0d68abb8298a3fd88ccff79f408a3e15b3e7f637ef5c980"}, - {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1648568667f820b8c48317c7006221dc40aced1869908c187f493838a1362bc"}, - {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:446e9fe52c051aeab12aac63d1017e0f68a02a92a027b901c4f8e931b24e5397"}, - {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fef5d70683643618244a4f5221053567ca3e77c2531e42ad48ae05fae909f542"}, - {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a1f4430cc0c9d6afa01214a4b3919d0a029637df8e09675ceef1ca3f0dfa0df"}, - {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d603846a8585b9432a0fd415db1d4c57c0f860eb4aea21f92559ff9902bae4d"}, - {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a844cdb5d7cbc22f5f16c7e2a0271170750763c4db08381b7f696dbd2c78a361"}, - {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:88887f69f7a00cf02b954cdc3034ffb383b2303bc0ab481d4716e2da51ddc10e"}, - {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ee463219d9ec6c2be1d331ab13a8e0cd50d2f32240a81d498266d77d07b7e71e"}, - {file = "safetensors-0.4.3-cp312-none-win32.whl", hash = "sha256:d0dd4a1db09db2dba0f94d15addc7e7cd3a7b0d393aa4c7518c39ae7374623c3"}, - {file = "safetensors-0.4.3-cp312-none-win_amd64.whl", hash = "sha256:d14d30c25897b2bf19b6fb5ff7e26cc40006ad53fd4a88244fdf26517d852dd7"}, - {file = "safetensors-0.4.3-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d1456f814655b224d4bf6e7915c51ce74e389b413be791203092b7ff78c936dd"}, - {file = "safetensors-0.4.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:455d538aa1aae4a8b279344a08136d3f16334247907b18a5c3c7fa88ef0d3c46"}, - {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf476bca34e1340ee3294ef13e2c625833f83d096cfdf69a5342475602004f95"}, - {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02ef3a24face643456020536591fbd3c717c5abaa2737ec428ccbbc86dffa7a4"}, - {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7de32d0d34b6623bb56ca278f90db081f85fb9c5d327e3c18fd23ac64f465768"}, - {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a0deb16a1d3ea90c244ceb42d2c6c276059616be21a19ac7101aa97da448faf"}, - {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c59d51f182c729f47e841510b70b967b0752039f79f1de23bcdd86462a9b09ee"}, - {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f598b713cc1a4eb31d3b3203557ac308acf21c8f41104cdd74bf640c6e538e3"}, - {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5757e4688f20df083e233b47de43845d1adb7e17b6cf7da5f8444416fc53828d"}, - {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fe746d03ed8d193674a26105e4f0fe6c726f5bb602ffc695b409eaf02f04763d"}, - {file = "safetensors-0.4.3-cp37-none-win32.whl", hash = "sha256:0d5ffc6a80f715c30af253e0e288ad1cd97a3d0086c9c87995e5093ebc075e50"}, - {file = "safetensors-0.4.3-cp37-none-win_amd64.whl", hash = "sha256:a11c374eb63a9c16c5ed146457241182f310902bd2a9c18255781bb832b6748b"}, - {file = "safetensors-0.4.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:b1e31be7945f66be23f4ec1682bb47faa3df34cb89fc68527de6554d3c4258a4"}, - {file = "safetensors-0.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:03a4447c784917c9bf01d8f2ac5080bc15c41692202cd5f406afba16629e84d6"}, - {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d244bcafeb1bc06d47cfee71727e775bca88a8efda77a13e7306aae3813fa7e4"}, - {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53c4879b9c6bd7cd25d114ee0ef95420e2812e676314300624594940a8d6a91f"}, - {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74707624b81f1b7f2b93f5619d4a9f00934d5948005a03f2c1845ffbfff42212"}, - {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d52c958dc210265157573f81d34adf54e255bc2b59ded6218500c9b15a750eb"}, - {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f9568f380f513a60139971169c4a358b8731509cc19112369902eddb33faa4d"}, - {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0d9cd8e1560dfc514b6d7859247dc6a86ad2f83151a62c577428d5102d872721"}, - {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:89f9f17b0dacb913ed87d57afbc8aad85ea42c1085bd5de2f20d83d13e9fc4b2"}, - {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1139eb436fd201c133d03c81209d39ac57e129f5e74e34bb9ab60f8d9b726270"}, - {file = "safetensors-0.4.3-cp38-none-win32.whl", hash = "sha256:d9c289f140a9ae4853fc2236a2ffc9a9f2d5eae0cb673167e0f1b8c18c0961ac"}, - {file = "safetensors-0.4.3-cp38-none-win_amd64.whl", hash = "sha256:622afd28968ef3e9786562d352659a37de4481a4070f4ebac883f98c5836563e"}, - {file = "safetensors-0.4.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8651c7299cbd8b4161a36cd6a322fa07d39cd23535b144d02f1c1972d0c62f3c"}, - {file = "safetensors-0.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e375d975159ac534c7161269de24ddcd490df2157b55c1a6eeace6cbb56903f0"}, - {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:084fc436e317f83f7071fc6a62ca1c513b2103db325cd09952914b50f51cf78f"}, - {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:41a727a7f5e6ad9f1db6951adee21bbdadc632363d79dc434876369a17de6ad6"}, - {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7dbbde64b6c534548696808a0e01276d28ea5773bc9a2dfb97a88cd3dffe3df"}, - {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bbae3b4b9d997971431c346edbfe6e41e98424a097860ee872721e176040a893"}, - {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01e4b22e3284cd866edeabe4f4d896229495da457229408d2e1e4810c5187121"}, - {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dd37306546b58d3043eb044c8103a02792cc024b51d1dd16bd3dd1f334cb3ed"}, - {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d8815b5e1dac85fc534a97fd339e12404db557878c090f90442247e87c8aeaea"}, - {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e011cc162503c19f4b1fd63dfcddf73739c7a243a17dac09b78e57a00983ab35"}, - {file = "safetensors-0.4.3-cp39-none-win32.whl", hash = "sha256:01feb3089e5932d7e662eda77c3ecc389f97c0883c4a12b5cfdc32b589a811c3"}, - {file = "safetensors-0.4.3-cp39-none-win_amd64.whl", hash = "sha256:3f9cdca09052f585e62328c1c2923c70f46814715c795be65f0b93f57ec98a02"}, - {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1b89381517891a7bb7d1405d828b2bf5d75528299f8231e9346b8eba092227f9"}, - {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cd6fff9e56df398abc5866b19a32124815b656613c1c5ec0f9350906fd798aac"}, - {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:840caf38d86aa7014fe37ade5d0d84e23dcfbc798b8078015831996ecbc206a3"}, - {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9650713b2cfa9537a2baf7dd9fee458b24a0aaaa6cafcea8bdd5fb2b8efdc34"}, - {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4119532cd10dba04b423e0f86aecb96cfa5a602238c0aa012f70c3a40c44b50"}, - {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e066e8861eef6387b7c772344d1fe1f9a72800e04ee9a54239d460c400c72aab"}, - {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:90964917f5b0fa0fa07e9a051fbef100250c04d150b7026ccbf87a34a54012e0"}, - {file = "safetensors-0.4.3-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c41e1893d1206aa7054029681778d9a58b3529d4c807002c156d58426c225173"}, - {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae7613a119a71a497d012ccc83775c308b9c1dab454806291427f84397d852fd"}, - {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9bac020faba7f5dc481e881b14b6425265feabb5bfc552551d21189c0eddc3"}, - {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:420a98f593ff9930f5822560d14c395ccbc57342ddff3b463bc0b3d6b1951550"}, - {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f5e6883af9a68c0028f70a4c19d5a6ab6238a379be36ad300a22318316c00cb0"}, - {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:cdd0a3b5da66e7f377474599814dbf5cbf135ff059cc73694de129b58a5e8a2c"}, - {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9bfb92f82574d9e58401d79c70c716985dc049b635fef6eecbb024c79b2c46ad"}, - {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:3615a96dd2dcc30eb66d82bc76cda2565f4f7bfa89fcb0e31ba3cea8a1a9ecbb"}, - {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868ad1b6fc41209ab6bd12f63923e8baeb1a086814cb2e81a65ed3d497e0cf8f"}, - {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7ffba80aa49bd09195145a7fd233a7781173b422eeb995096f2b30591639517"}, - {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0acbe31340ab150423347e5b9cc595867d814244ac14218932a5cf1dd38eb39"}, - {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:19bbdf95de2cf64f25cd614c5236c8b06eb2cfa47cbf64311f4b5d80224623a3"}, - {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b852e47eb08475c2c1bd8131207b405793bfc20d6f45aff893d3baaad449ed14"}, - {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5d07cbca5b99babb692d76d8151bec46f461f8ad8daafbfd96b2fca40cadae65"}, - {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1ab6527a20586d94291c96e00a668fa03f86189b8a9defa2cdd34a1a01acc7d5"}, - {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02318f01e332cc23ffb4f6716e05a492c5f18b1d13e343c49265149396284a44"}, - {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec4b52ce9a396260eb9731eb6aea41a7320de22ed73a1042c2230af0212758ce"}, - {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:018b691383026a2436a22b648873ed11444a364324e7088b99cd2503dd828400"}, - {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:309b10dbcab63269ecbf0e2ca10ce59223bb756ca5d431ce9c9eeabd446569da"}, - {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b277482120df46e27a58082df06a15aebda4481e30a1c21eefd0921ae7e03f65"}, - {file = "safetensors-0.4.3.tar.gz", hash = "sha256:2f85fc50c4e07a21e95c24e07460fe6f7e2859d0ce88092838352b798ce711c2"}, +files = [ + {file = "safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073"}, + {file = "safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7"}, + {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11bce6164887cd491ca75c2326a113ba934be596e22b28b1742ce27b1d076467"}, + {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4a243be3590bc3301c821da7a18d87224ef35cbd3e5f5727e4e0728b8172411e"}, + {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8bd84b12b1670a6f8e50f01e28156422a2bc07fb16fc4e98bded13039d688a0d"}, + {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:391ac8cab7c829452175f871fcaf414aa1e292b5448bd02620f675a7f3e7abb9"}, + {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cead1fa41fc54b1e61089fa57452e8834f798cb1dc7a09ba3524f1eb08e0317a"}, + {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1077f3e94182d72618357b04b5ced540ceb71c8a813d3319f1aba448e68a770d"}, + {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:799021e78287bac619c7b3f3606730a22da4cda27759ddf55d37c8db7511c74b"}, + {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:df26da01aaac504334644e1b7642fa000bfec820e7cef83aeac4e355e03195ff"}, + {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:32c3ef2d7af8b9f52ff685ed0bc43913cdcde135089ae322ee576de93eae5135"}, + {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:37f1521be045e56fc2b54c606d4455573e717b2d887c579ee1dbba5f868ece04"}, + {file = "safetensors-0.5.3-cp38-abi3-win32.whl", hash = "sha256:cfc0ec0846dcf6763b0ed3d1846ff36008c6e7290683b61616c4b040f6a54ace"}, + {file = "safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11"}, + {file = "safetensors-0.5.3.tar.gz", hash = "sha256:b6b0d6ecacec39a4fdd99cc19f4576f5219ce858e6fd8dbe7609df0b8dc56965"}, ] [package.extras] @@ -2654,7 +2493,7 @@ jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[num mlx = ["mlx (>=0.0.9)"] numpy = ["numpy (>=1.21.6)"] paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] -pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.18.0)"] quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"] @@ -2662,33 +2501,41 @@ torch = ["safetensors[numpy]", "torch (>=1.10)"] [[package]] name = "scikit-learn" -version = "1.5.1" +version = "1.6.1" description = "A set of python modules for machine learning and data mining" optional = false python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "scikit_learn-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:781586c414f8cc58e71da4f3d7af311e0505a683e112f2f62919e3019abd3745"}, - {file = "scikit_learn-1.5.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5b213bc29cc30a89a3130393b0e39c847a15d769d6e59539cd86b75d276b1a7"}, - {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff4ba34c2abff5ec59c803ed1d97d61b036f659a17f55be102679e88f926fac"}, - {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:161808750c267b77b4a9603cf9c93579c7a74ba8486b1336034c2f1579546d21"}, - {file = "scikit_learn-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:10e49170691514a94bb2e03787aa921b82dbc507a4ea1f20fd95557862c98dc1"}, - {file = "scikit_learn-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:154297ee43c0b83af12464adeab378dee2d0a700ccd03979e2b821e7dd7cc1c2"}, - {file = "scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b5e865e9bd59396220de49cb4a57b17016256637c61b4c5cc81aaf16bc123bbe"}, - {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909144d50f367a513cee6090873ae582dba019cb3fca063b38054fa42704c3a4"}, - {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689b6f74b2c880276e365fe84fe4f1befd6a774f016339c65655eaff12e10cbf"}, - {file = "scikit_learn-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:9a07f90846313a7639af6a019d849ff72baadfa4c74c778821ae0fad07b7275b"}, - {file = "scikit_learn-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5944ce1faada31c55fb2ba20a5346b88e36811aab504ccafb9f0339e9f780395"}, - {file = "scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0828673c5b520e879f2af6a9e99eee0eefea69a2188be1ca68a6121b809055c1"}, - {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508907e5f81390e16d754e8815f7497e52139162fd69c4fdbd2dfa5d6cc88915"}, - {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97625f217c5c0c5d0505fa2af28ae424bd37949bb2f16ace3ff5f2f81fb4498b"}, - {file = "scikit_learn-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:da3f404e9e284d2b0a157e1b56b6566a34eb2798205cba35a211df3296ab7a74"}, - {file = "scikit_learn-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88e0672c7ac21eb149d409c74cc29f1d611d5158175846e7a9c2427bd12b3956"}, - {file = "scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7b073a27797a283187a4ef4ee149959defc350b46cbf63a84d8514fe16b69855"}, - {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b59e3e62d2be870e5c74af4e793293753565c7383ae82943b83383fdcf5cc5c1"}, - {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd8d3a19d4bd6dc5a7d4f358c8c3a60934dc058f363c34c0ac1e9e12a31421d"}, - {file = "scikit_learn-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:5f57428de0c900a98389c4a433d4a3cf89de979b3aa24d1c1d251802aa15e44d"}, - {file = "scikit_learn-1.5.1.tar.gz", hash = "sha256:0ea5d40c0e3951df445721927448755d3fe1d80833b0b7308ebff5d2a45e6414"}, +files = [ + {file = "scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e"}, + {file = "scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36"}, + {file = "scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8634c4bd21a2a813e0a7e3900464e6d593162a29dd35d25bdf0103b3fce60ed5"}, + {file = "scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775da975a471c4f6f467725dff0ced5c7ac7bda5e9316b260225b48475279a1b"}, + {file = "scikit_learn-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:8a600c31592bd7dab31e1c61b9bbd6dea1b3433e67d264d17ce1017dbdce8002"}, + {file = "scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33"}, + {file = "scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d"}, + {file = "scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2"}, + {file = "scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8"}, + {file = "scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415"}, + {file = "scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b"}, + {file = "scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2"}, + {file = "scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f"}, + {file = "scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86"}, + {file = "scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52"}, + {file = "scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322"}, + {file = "scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1"}, + {file = "scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348"}, + {file = "scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97"}, + {file = "scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f"}, + {file = "scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6849dd3234e87f55dce1db34c89a810b489ead832aaf4d4550b7ea85628be6c1"}, + {file = "scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e"}, + {file = "scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44a17798172df1d3c1065e8fcf9019183f06c87609b49a124ebdf57ae6cb0107"}, + {file = "scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b7a3b86e411e4bce21186e1c180d792f3d99223dcfa3b4f597ecc92fa1a422"}, + {file = "scikit_learn-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:7a73d457070e3318e32bdb3aa79a8d990474f19035464dfd8bede2883ab5dc3b"}, + {file = "scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e"}, ] [package.dependencies] @@ -2700,11 +2547,11 @@ threadpoolctl = ">=3.1.0" [package.extras] benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] -docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.17.1)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)", "towncrier (>=24.8.0)"] examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] maintenance = ["conda-lock (==2.5.6)"] -tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.5.1)", "scikit-image (>=0.17.2)"] [[package]] name = "scipy" @@ -2712,7 +2559,6 @@ version = "1.13.1" description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, @@ -2755,7 +2601,6 @@ version = "1.5.11" description = "sentence segmentation and word tokenization tools" optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "segtok-1.5.11-py3-none-any.whl", hash = "sha256:910616b76198c3141b2772df530270d3b706e42ae69a5b30ef115c7bd5d1501a"}, {file = "segtok-1.5.11.tar.gz", hash = "sha256:8ab2dd44245bcbfec25b575dc4618473bbdf2af8c2649698cd5a370f42f3db23"}, @@ -2766,14 +2611,13 @@ regex = "*" [[package]] name = "semver" -version = "3.0.2" +version = "3.0.4" description = "Python helper for Semantic Versioning (https://semver.org)" optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ - {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"}, - {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"}, + {file = "semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746"}, + {file = "semver-3.0.4.tar.gz", hash = "sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602"}, ] [[package]] @@ -2782,7 +2626,6 @@ version = "0.2.0" description = "SentencePiece python wrapper" optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227"}, {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452"}, @@ -2845,7 +2688,6 @@ version = "70.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"}, {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"}, @@ -2853,30 +2695,28 @@ files = [ [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov ; platform_python_implementation != \"PyPy\"", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "six" -version = "1.16.0" +version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["main", "dev"] +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] [[package]] name = "smart-open" -version = "7.0.4" +version = "7.1.0" description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" optional = false python-versions = "<4.0,>=3.7" -groups = ["dev"] files = [ - {file = "smart_open-7.0.4-py3-none-any.whl", hash = "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47"}, - {file = "smart_open-7.0.4.tar.gz", hash = "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524"}, + {file = "smart_open-7.1.0-py3-none-any.whl", hash = "sha256:4b8489bb6058196258bafe901730c7db0dcf4f083f316e97269c66f45502055b"}, + {file = "smart_open-7.1.0.tar.gz", hash = "sha256:a4f09f84f0f6d3637c6543aca7b5487438877a21360e7368ccf1f704789752ba"}, ] [package.dependencies] @@ -2889,20 +2729,19 @@ gcs = ["google-cloud-storage (>=2.6.0)"] http = ["requests"] s3 = ["boto3"] ssh = ["paramiko"] -test = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "paramiko", "pytest", "pytest-rerunfailures", "requests", "responses", "zstandard"] +test = ["awscli", "azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "numpy", "paramiko", "pyopenssl", "pytest", "pytest-benchmark", "pytest-rerunfailures", "requests", "responses", "zstandard"] webhdfs = ["requests"] zst = ["zstandard"] [[package]] name = "soupsieve" -version = "2.5" +version = "2.7" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ - {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, - {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, + {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, + {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, ] [[package]] @@ -2911,7 +2750,6 @@ version = "2.1.0" description = "Persistent dict in Python, backed up by sqlite3 and pickle, multithread-safe." optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c"}, ] @@ -2922,7 +2760,6 @@ version = "1.12.1" description = "Computer algebra system (CAS) in Python" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] files = [ {file = "sympy-1.12.1-py3-none-any.whl", hash = "sha256:9b2cbc7f1a640289430e13d2a56f02f867a1da0190f2f99d8968c2f74da0e515"}, {file = "sympy-1.12.1.tar.gz", hash = "sha256:2877b03f998cd8c08f07cd0de5b767119cd3ef40d09f41c30d722f6686b0fb88"}, @@ -2937,7 +2774,6 @@ version = "0.9.0" description = "Pretty-print tabular data" optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, @@ -2952,8 +2788,6 @@ version = "2021.12.0" description = "IntelĀ® oneAPI Threading Building Blocks (oneTBB)" optional = false python-versions = "*" -groups = ["main", "dev"] -markers = "platform_system == \"Windows\"" files = [ {file = "tbb-2021.12.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:f2cc9a7f8ababaa506cbff796ce97c3bf91062ba521e15054394f773375d81d8"}, {file = "tbb-2021.12.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:a925e9a7c77d3a46ae31c34b0bb7f801c4118e857d137b68f68a8e458fcf2bd7"}, @@ -2963,14 +2797,13 @@ files = [ [[package]] name = "threadpoolctl" -version = "3.5.0" +version = "3.6.0" description = "threadpoolctl" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, - {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, + {file = "threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb"}, + {file = "threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e"}, ] [[package]] @@ -2979,7 +2812,6 @@ version = "0.15.2" description = "" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] files = [ {file = "tokenizers-0.15.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:52f6130c9cbf70544287575a985bf44ae1bda2da7e8c24e97716080593638012"}, {file = "tokenizers-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:054c1cc9c6d68f7ffa4e810b3d5131e0ba511b6e4be34157aa08ee54c2f8d9ee"}, @@ -3107,8 +2939,6 @@ version = "2.0.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.7" -groups = ["dev"] -markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -3120,7 +2950,6 @@ version = "2.3.0+cpu" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false python-versions = ">=3.8.0" -groups = ["main", "dev"] files = [ {file = "torch-2.3.0+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:e3c220702d82c7596924150e0499fbbffcf62a88a59adc860fa357cd8dc1c302"}, {file = "torch-2.3.0+cpu-cp310-cp310-win_amd64.whl", hash = "sha256:ab0c05525195b8fecdf2ea75968ed32ccd87dff16381b6e13249babb4a9596ff"}, @@ -3158,7 +2987,6 @@ version = "1.4.0.post0" description = "PyTorch native Metrics" optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "torchmetrics-1.4.0.post0-py3-none-any.whl", hash = "sha256:ab234216598e3fbd8d62ee4541a0e74e7e8fc935d099683af5b8da50f745b3c8"}, {file = "torchmetrics-1.4.0.post0.tar.gz", hash = "sha256:ab9bcfe80e65dbabbddb6cecd9be21f1f1d5207bb74051ef95260740f2762358"}, @@ -3188,7 +3016,6 @@ version = "4.66.4" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] files = [ {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"}, {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"}, @@ -3205,18 +3032,16 @@ telegram = ["requests"] [[package]] name = "transformer-smaller-training-vocab" -version = "0.4.0" +version = "0.4.1" description = "Temporary remove unused tokens during training to save ram and speed." optional = false -python-versions = "<4.0,>=3.8" -groups = ["dev"] +python-versions = "<4.0,>=3.9" files = [ - {file = "transformer_smaller_training_vocab-0.4.0-py3-none-any.whl", hash = "sha256:01cb3d8f4818121172e1591a06c3149bf49bc18d6f6f269eb42d2c4ed155cfcc"}, - {file = "transformer_smaller_training_vocab-0.4.0.tar.gz", hash = "sha256:d7360ac084786f66f99ef16d621f34acbb0dce6d9a624525d1f7dc8b6c3a49f7"}, + {file = "transformer_smaller_training_vocab-0.4.1-py3-none-any.whl", hash = "sha256:e19c4c95b594569710b1235f00ff69ddad6401a15554e59657f768dde885bb3a"}, + {file = "transformer_smaller_training_vocab-0.4.1.tar.gz", hash = "sha256:834a804a712ba23cbe410e390791db70d7812b0d7d3bfe1de3efa7b89a85a06c"}, ] [package.dependencies] -numpy = {version = ">=1.21.0,<2.0.0", markers = "python_version >= \"3.9\""} torch = ">=1.8.0,<2.0.1 || >2.0.1,<3.0.0" transformers = {version = ">=4.1,<5.0", extras = ["sentencepiece", "torch"]} @@ -3226,7 +3051,6 @@ version = "4.36.2" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.8.0" -groups = ["main", "dev"] files = [ {file = "transformers-4.36.2-py3-none-any.whl", hash = "sha256:462066c4f74ee52516f12890dcc9ec71d1a5e97998db621668455117a54330f6"}, {file = "transformers-4.36.2.tar.gz", hash = "sha256:d8068e897e47793281501e547d2bbdfc5b8556409c2cb6c3d9e2ca77d4c0b4ec"}, @@ -3299,7 +3123,6 @@ version = "4.12.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] files = [ {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, @@ -3307,14 +3130,13 @@ files = [ [[package]] name = "tzdata" -version = "2024.1" +version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["main"] files = [ - {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, - {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, ] [[package]] @@ -3323,15 +3145,14 @@ version = "1.26.19" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -groups = ["main", "dev"] files = [ {file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"}, {file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"}, ] [package.extras] -brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -3340,7 +3161,6 @@ version = "20.26.2" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ {file = "virtualenv-20.26.2-py3-none-any.whl", hash = "sha256:a624db5e94f01ad993d476b9ee5346fdf7b9de43ccaee0e0197012dc838a0e9b"}, {file = "virtualenv-20.26.2.tar.gz", hash = "sha256:82bf0f4eebbb78d36ddaee0283d43fe5736b53880b8a8cdcd37390a07ac3741c"}, @@ -3353,7 +3173,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] [[package]] name = "wcwidth" @@ -3361,7 +3181,6 @@ version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -3369,14 +3188,12 @@ files = [ [[package]] name = "wikipedia-api" -version = "0.6.0" +version = "0.8.1" description = "Python Wrapper for Wikipedia" optional = false python-versions = "*" -groups = ["dev"] files = [ - {file = "Wikipedia-API-0.6.0.tar.gz", hash = "sha256:61e94921cca9ec68e92aa5f258261d6a88b7baa960f9acfcb0c9c2c525dcb3ff"}, - {file = "Wikipedia_API-0.6.0-py3-none-any.whl", hash = "sha256:6dfd6b3b680e342a3843fe954049c5784c1a67fadc0060f9d1696d1d0e41ecfb"}, + {file = "wikipedia_api-0.8.1.tar.gz", hash = "sha256:b31e93b3f5407c1a1ba413ed7326a05379a3c270df6cf6a211aca67a14c5658b"}, ] [package.dependencies] @@ -3384,82 +3201,90 @@ requests = "*" [[package]] name = "wrapt" -version = "1.16.0" +version = "1.17.2" description = "Module for decorators, wrappers and monkey patching." optional = false -python-versions = ">=3.6" -groups = ["dev"] -files = [ - {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, - {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, - {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, - {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, - {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, - {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, - {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, - {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, - {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, - {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, - {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, - {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, - {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, - {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, - {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, - {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, - {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, - {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, - {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, - {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, - {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, - {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, - {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, - {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, - {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, - {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, - {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, - {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, - {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, - {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, - {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, - {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, - {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, - {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, - {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, - {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, - {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, - {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, - {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, - {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, - {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, - {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, - {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, - {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, - {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, - {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, - {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, - {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, - {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, - {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, - {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, - {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, - {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, - {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, - {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, - {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, - {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, - {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, - {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, - {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, - {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, - {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, - {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, - {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, - {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, - {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, - {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, - {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, - {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, - {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +python-versions = ">=3.8" +files = [ + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62"}, + {file = "wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563"}, + {file = "wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72"}, + {file = "wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317"}, + {file = "wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9"}, + {file = "wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9"}, + {file = "wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504"}, + {file = "wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a"}, + {file = "wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f"}, + {file = "wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555"}, + {file = "wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f"}, + {file = "wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7"}, + {file = "wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9"}, + {file = "wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb"}, + {file = "wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb"}, + {file = "wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8"}, + {file = "wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3"}, ] [[package]] @@ -3468,7 +3293,6 @@ version = "1.9.4" description = "Yet another URL library" optional = false python-versions = ">=3.7" -groups = ["main"] files = [ {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, @@ -3568,22 +3392,24 @@ multidict = ">=4.0" [[package]] name = "zipp" -version = "3.19.2" +version = "3.21.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version == \"3.9\"" +python-versions = ">=3.9" files = [ - {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, - {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, + {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, + {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, ] [package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +type = ["pytest-mypy"] [metadata] -lock-version = "2.1" +lock-version = "2.0" python-versions = "^3.9" -content-hash = "360e7128e1296a81a16070db02a7145bf9e807f3795bcfe56f4e1e453c976f6b" +content-hash = "43138cb0ddfceb279a67580baf1e70c751a5c4904c398dbbd5808584f7ada8ab" diff --git a/pyproject.toml b/pyproject.toml index 4c5e84c4f..09cedf7b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,11 @@ networkx = "^3.0.0" [tool.poetry.group.dev] optional = true +[[tool.poetry.source]] +name = "pytorch" +url = "https://download.pytorch.org/whl/cpu" +priority = "explicit" + [[tool.poetry.source]] name = "pre-release" url = "https://test.pypi.org/simple/" From 057bd39d78cea687d51778a252e2c5166240be46 Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Sun, 27 Apr 2025 14:21:04 +0200 Subject: [PATCH 03/11] add annotation tests from pytorch-ie --- tests/test_annotations.py | 350 +++++++++++++++++++++++++++++++++++++- 1 file changed, 348 insertions(+), 2 deletions(-) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 5ea2a0b85..b41d4bc5d 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -1,10 +1,23 @@ +import dataclasses import json +import re from typing import Dict, Optional import pytest -from pie_core import Annotation +from pie_core import Annotation, AnnotationLayer, annotation_field -from pie_modules.annotations import LabeledMultiSpan +from pie_modules.annotations import ( + BinaryRelation, + Label, + LabeledMultiSpan, + LabeledSpan, + MultiLabel, + MultiLabeledBinaryRelation, + MultiLabeledSpan, + NaryRelation, + Span, +) +from pie_modules.documents import TextBasedDocument def _test_annotation_reconstruction( @@ -17,6 +30,163 @@ def _test_annotation_reconstruction( assert annotation_reconstructed == annotation +def test_label(): + label1 = Label(label="label1") + assert label1.label == "label1" + assert label1.score == pytest.approx(1.0) + assert label1.resolve() == "label1" + + label2 = Label(label="label2", score=0.5) + assert label2.label == "label2" + assert label2.score == pytest.approx(0.5) + + assert label2.asdict() == { + "_id": label2._id, + "label": "label2", + "score": 0.5, + } + + _test_annotation_reconstruction(label2) + + +def test_multilabel(): + multilabel1 = MultiLabel(label=("label1", "label2")) + assert multilabel1.label == ("label1", "label2") + assert multilabel1.score == pytest.approx((1.0, 1.0)) + assert multilabel1.resolve() == ("label1", "label2") + + multilabel2 = MultiLabel(label=("label3", "label4"), score=(0.4, 0.5)) + assert multilabel2.label == ("label3", "label4") + assert multilabel2.score == pytest.approx((0.4, 0.5)) + + assert multilabel2.asdict() == { + "_id": multilabel2._id, + "label": ("label3", "label4"), + "score": (0.4, 0.5), + } + + _test_annotation_reconstruction(multilabel2) + + with pytest.raises( + ValueError, match=re.escape("Number of labels (2) and scores (3) must be equal.") + ): + MultiLabel(label=("label5", "label6"), score=(0.1, 0.2, 0.3)) + + +def test_span(): + span = Span(start=1, end=2) + assert span.start == 1 + assert span.end == 2 + + assert span.asdict() == { + "_id": span._id, + "start": 1, + "end": 2, + } + + _test_annotation_reconstruction(span) + + with pytest.raises(ValueError) as excinfo: + span.resolve() + assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." + + @dataclasses.dataclass + class TestDocument(TextBasedDocument): + spans: AnnotationLayer[Span] = annotation_field(target="text") + + doc = TestDocument(text="Hello, world!") + span = Span(start=7, end=12) + doc.spans.append(span) + assert span.resolve() == "world" + + +def test_labeled_span(): + labeled_span1 = LabeledSpan(start=1, end=2, label="label1") + assert labeled_span1.start == 1 + assert labeled_span1.end == 2 + assert labeled_span1.label == "label1" + assert labeled_span1.score == pytest.approx(1.0) + + labeled_span2 = LabeledSpan(start=3, end=4, label="label2", score=0.5) + assert labeled_span2.start == 3 + assert labeled_span2.end == 4 + assert labeled_span2.label == "label2" + assert labeled_span2.score == pytest.approx(0.5) + + assert labeled_span2.asdict() == { + "_id": labeled_span2._id, + "start": 3, + "end": 4, + "label": "label2", + "score": 0.5, + } + + _test_annotation_reconstruction(labeled_span2) + + with pytest.raises(ValueError) as excinfo: + labeled_span1.resolve() + assert ( + str(excinfo.value) + == "LabeledSpan(start=1, end=2, label='label1', score=1.0) is not attached to a target." + ) + + @dataclasses.dataclass + class TestDocument(TextBasedDocument): + spans: AnnotationLayer[LabeledSpan] = annotation_field(target="text") + + doc = TestDocument(text="Hello, world!") + labeled_span = LabeledSpan(start=7, end=12, label="LOC") + doc.spans.append(labeled_span) + assert labeled_span.resolve() == ("LOC", "world") + + +def test_multilabeled_span(): + multilabeled_span1 = MultiLabeledSpan(start=1, end=2, label=("label1", "label2")) + assert multilabeled_span1.start == 1 + assert multilabeled_span1.end == 2 + assert multilabeled_span1.label == ("label1", "label2") + assert multilabeled_span1.score == pytest.approx((1.0, 1.0)) + + multilabeled_span2 = MultiLabeledSpan( + start=3, end=4, label=("label3", "label4"), score=(0.4, 0.5) + ) + assert multilabeled_span2.start == 3 + assert multilabeled_span2.end == 4 + assert multilabeled_span2.label == ("label3", "label4") + assert multilabeled_span2.score == pytest.approx((0.4, 0.5)) + + assert multilabeled_span2.asdict() == { + "_id": multilabeled_span2._id, + "start": 3, + "end": 4, + "label": ("label3", "label4"), + "score": (0.4, 0.5), + } + + _test_annotation_reconstruction(multilabeled_span2) + + with pytest.raises( + ValueError, match=re.escape("Number of labels (2) and scores (3) must be equal.") + ): + MultiLabeledSpan(start=5, end=6, label=("label5", "label6"), score=(0.1, 0.2, 0.3)) + + with pytest.raises(ValueError) as excinfo: + multilabeled_span1.resolve() + assert ( + str(excinfo.value) + == "MultiLabeledSpan(start=1, end=2, label=('label1', 'label2'), score=(1.0, 1.0)) is not attached to a target." + ) + + @dataclasses.dataclass + class TestDocument(TextBasedDocument): + spans: AnnotationLayer[MultiLabeledSpan] = annotation_field(target="text") + + doc = TestDocument(text="Hello, world!") + multilabeled_span = MultiLabeledSpan(start=7, end=12, label=("LOC", "ORG")) + doc.spans.append(multilabeled_span) + assert multilabeled_span.resolve() == (("LOC", "ORG"), "world") + + def test_labeled_multi_span(): labeled_multi_span1 = LabeledMultiSpan(slices=((1, 2), (3, 4)), label="label1") assert labeled_multi_span1.slices == ((1, 2), (3, 4)) @@ -40,3 +210,179 @@ def test_labeled_multi_span(): } _test_annotation_reconstruction(labeled_multi_span2) + + +def test_binary_relation(): + head = Span(start=1, end=2) + tail = Span(start=3, end=4) + + binary_relation1 = BinaryRelation(head=head, tail=tail, label="label1") + assert binary_relation1.head == head + assert binary_relation1.tail == tail + assert binary_relation1.label == "label1" + assert binary_relation1.score == pytest.approx(1.0) + + binary_relation2 = BinaryRelation(head=head, tail=tail, label="label2", score=0.5) + assert binary_relation2.head == head + assert binary_relation2.tail == tail + assert binary_relation2.label == "label2" + assert binary_relation2.score == pytest.approx(0.5) + + assert binary_relation2.asdict() == { + "_id": binary_relation2._id, + "head": head._id, + "tail": tail._id, + "label": "label2", + "score": 0.5, + } + + annotation_store = { + head._id: head, + tail._id: tail, + } + _test_annotation_reconstruction(binary_relation2, annotation_store=annotation_store) + + with pytest.raises( + ValueError, + match=re.escape("Unable to resolve the annotation id without annotation_store."), + ): + BinaryRelation.fromdict(binary_relation2.asdict()) + + with pytest.raises(ValueError) as excinfo: + binary_relation1.resolve() + assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." + + @dataclasses.dataclass + class TestDocument(TextBasedDocument): + spans: AnnotationLayer[Span] = annotation_field(target="text") + relations: AnnotationLayer[BinaryRelation] = annotation_field(target="spans") + + doc = TestDocument(text="Hello, world!") + head = Span(start=0, end=5) + tail = Span(start=7, end=12) + doc.spans.extend([head, tail]) + relation = BinaryRelation(head=head, tail=tail, label="LABEL") + doc.relations.append(relation) + assert relation.resolve() == ("LABEL", ("Hello", "world")) + + +def test_multilabeled_binary_relation(): + head = Span(start=1, end=2) + tail = Span(start=3, end=4) + + binary_relation1 = MultiLabeledBinaryRelation(head=head, tail=tail, label=("label1", "label2")) + assert binary_relation1.head == head + assert binary_relation1.tail == tail + assert binary_relation1.label == ("label1", "label2") + assert binary_relation1.score == pytest.approx((1.0, 1.0)) + + binary_relation2 = MultiLabeledBinaryRelation( + head=head, tail=tail, label=("label3", "label4"), score=(0.4, 0.5) + ) + assert binary_relation2.head == head + assert binary_relation2.tail == tail + assert binary_relation2.label == ("label3", "label4") + assert binary_relation2.score == pytest.approx((0.4, 0.5)) + + assert binary_relation2.asdict() == { + "_id": binary_relation2._id, + "head": head._id, + "tail": tail._id, + "label": ("label3", "label4"), + "score": (0.4, 0.5), + } + + annotation_store = { + head._id: head, + tail._id: tail, + } + _test_annotation_reconstruction(binary_relation2, annotation_store=annotation_store) + + with pytest.raises( + ValueError, + match=re.escape("Unable to resolve the annotation id without annotation_store."), + ): + MultiLabeledBinaryRelation.fromdict(binary_relation2.asdict()) + + with pytest.raises( + ValueError, match=re.escape("Number of labels (2) and scores (3) must be equal.") + ): + MultiLabeledBinaryRelation( + head=head, tail=tail, label=("label5", "label6"), score=(0.1, 0.2, 0.3) + ) + + with pytest.raises(ValueError) as excinfo: + binary_relation1.resolve() + assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." + + @dataclasses.dataclass + class TestDocument(TextBasedDocument): + spans: AnnotationLayer[Span] = annotation_field(target="text") + relations: AnnotationLayer[MultiLabeledBinaryRelation] = annotation_field(target="spans") + + doc = TestDocument(text="Hello, world!") + head = Span(start=0, end=5) + tail = Span(start=7, end=12) + doc.spans.extend([head, tail]) + relation = MultiLabeledBinaryRelation(head=head, tail=tail, label=("LABEL1", "LABEL2")) + doc.relations.append(relation) + assert relation.resolve() == (("LABEL1", "LABEL2"), ("Hello", "world")) + + +def test_nary_relation(): + arg1 = Span(start=1, end=2) + arg2 = Span(start=3, end=4) + arg3 = Span(start=5, end=6) + + nary_relation1 = NaryRelation( + arguments=(arg1, arg2, arg3), roles=("role1", "role2", "role3"), label="label1" + ) + + assert nary_relation1.arguments == (arg1, arg2, arg3) + assert nary_relation1.roles == ("role1", "role2", "role3") + assert nary_relation1.label == "label1" + assert nary_relation1.score == pytest.approx(1.0) + + assert nary_relation1.asdict() == { + "_id": nary_relation1._id, + "arguments": [arg1._id, arg2._id, arg3._id], + "roles": ("role1", "role2", "role3"), + "label": "label1", + "score": 1.0, + } + + annotation_store = { + arg1._id: arg1, + arg2._id: arg2, + arg3._id: arg3, + } + _test_annotation_reconstruction(nary_relation1, annotation_store=annotation_store) + + with pytest.raises( + ValueError, + match=re.escape("Unable to resolve the annotation id without annotation_store."), + ): + NaryRelation.fromdict(nary_relation1.asdict()) + + with pytest.raises(ValueError) as excinfo: + nary_relation1.resolve() + assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." + + @dataclasses.dataclass + class TestDocument(TextBasedDocument): + spans: AnnotationLayer[Span] = annotation_field(target="text") + relations: AnnotationLayer[NaryRelation] = annotation_field(target="spans") + + doc = TestDocument(text="Hello, world A and B!") + arg1 = Span(start=0, end=5) + arg2 = Span(start=7, end=14) + arg3 = Span(start=19, end=20) + doc.spans.extend([arg1, arg2, arg3]) + relation = NaryRelation( + arguments=(arg1, arg2, arg3), roles=("ARG1", "ARG2", "ARG3"), label="LABEL" + ) + doc.relations.append(relation) + assert relation.resolve() == ( + "LABEL", + (("ARG1", "Hello"), ("ARG2", "world A"), ("ARG3", "B")), + ) From 7dda761cd47912297b3097c4d381ea38930bb4dc Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Sun, 27 Apr 2025 14:47:19 +0200 Subject: [PATCH 04/11] remove tokenize_document --- poetry.lock | 2 +- pyproject.toml | 2 - .../document/processing/__init__.py | 1 - .../document/processing/tokenization.py | 124 +----- src/pie_modules/metrics/__init__.py | 1 - .../relation_argument_distance_collector.py | 41 +- .../metrics/span_coverage_collector.py | 47 +-- .../metrics/span_length_collector.py | 42 +- src/pie_modules/metrics/statistics.py | 36 +- src/pie_modules/utils/tokenization.py | 36 -- .../document/processing/test_tokenization.py | 372 ------------------ ...st_relation_argument_distance_collector.py | 104 +---- tests/metrics/test_span_coverage_collector.py | 84 +--- tests/metrics/test_span_length_collector.py | 78 +--- tests/metrics/test_statistics.py | 15 - tests/utils/test_tokenization.py | 55 --- 16 files changed, 14 insertions(+), 1026 deletions(-) delete mode 100644 src/pie_modules/utils/tokenization.py delete mode 100644 tests/utils/test_tokenization.py diff --git a/poetry.lock b/poetry.lock index 77e95ccc0..f07498fe5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3412,4 +3412,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "43138cb0ddfceb279a67580baf1e70c751a5c4904c398dbbd5808584f7ada8ab" +content-hash = "5a228f0d856084e84519582d2fe026d5a808ff9c9144d9058a43ef6bf4c67714" diff --git a/pyproject.toml b/pyproject.toml index 09cedf7b4..ceea5ca8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,6 @@ classifiers = [ python = "^3.9" pie-core = ">=0.1.2,<0.2.0" pandas = ">=2.0.3,<3.0.0" -# required for AutoTokenizer, PreTrainedTokenizer, BatchEncoding -transformers = ">=4.0.0,<5.0.0" [tool.poetry.group.dev.dependencies] pytest = "^7.4.2" diff --git a/src/pie_modules/document/processing/__init__.py b/src/pie_modules/document/processing/__init__.py index 296944c63..28d15ba1a 100644 --- a/src/pie_modules/document/processing/__init__.py +++ b/src/pie_modules/document/processing/__init__.py @@ -7,5 +7,4 @@ from .tokenization import ( text_based_document_to_token_based, token_based_document_to_text_based, - tokenize_document, ) diff --git a/src/pie_modules/document/processing/tokenization.py b/src/pie_modules/document/processing/tokenization.py index d99da6fbd..c45b6ebc8 100644 --- a/src/pie_modules/document/processing/tokenization.py +++ b/src/pie_modules/document/processing/tokenization.py @@ -1,8 +1,6 @@ -import functools -import json import logging from collections import defaultdict -from copy import copy, deepcopy +from copy import deepcopy from typing import ( Callable, Dict, @@ -448,123 +446,3 @@ def token_based_document_to_text_based( added_annotations.setdefault(layer_name, {}).update(annotation_mapping) return result - - -def tokenize_document( - doc: TextBasedDocument, - tokenizer: Callable, - result_document_type: Type[ToD], - partition_layer: Optional[str] = None, - strip_spans: bool = False, - strict_span_conversion: bool = True, - added_annotations: Optional[List[Dict[str, Dict[Annotation, Annotation]]]] = None, - verbose: bool = True, - **tokenize_kwargs, -) -> List[ToD]: - """Tokenize a document with a given tokenizer and return a list of token based documents. The - document is tokenized in partitions if a partition layer is provided. The annotations that - target the text are converted to target the tokens and also all dependent annotations are - converted. - - Args: - doc (TextBasedDocument): The document to tokenize. - tokenizer (PreTrainedTokenizer): The tokenizer. - result_document_type (Type[ToD]): The exact type of the token based documents. - partition_layer (Optional[str], optional): The layer to use for partitioning the document. If None, the whole - document is tokenized. Defaults to None. - strip_spans (bool, optional): If True, strip the whitespace from the character spans before converting them to - token spans. Defaults to False. - strict_span_conversion (bool, optional): If True, raise an error if not all annotations can be converted to - token based documents. Defaults to True. - added_annotations (Optional[List[Dict[str, Dict[Annotation, Annotation]]]], optional): Pass an empty list to - collect the added annotations. Defaults to None. - verbose (bool, optional): If True, log warnings if annotations can not be converted. Defaults to True. - - Returns: - List[ToD]: The token based documents of type result_document_type with the converted annotations. - """ - - added_annotation_lists: Dict[str, List[Annotation]] = defaultdict(list) - result = [] - partitions: Iterable[Span] - if partition_layer is None: - partitions = [Span(start=0, end=len(doc.text))] - else: - partitions = doc[partition_layer] - for partition in partitions: - text = doc.text[partition.start : partition.end] - current_tokenize_kwargs = copy(tokenize_kwargs) - if "text" in tokenize_kwargs: - current_tokenize_kwargs["text_pair"] = text - sequence_index = 1 - else: - current_tokenize_kwargs["text"] = text - sequence_index = 0 - tokenized_text = tokenizer(**current_tokenize_kwargs) - for batch_encoding in tokenized_text.encodings: - token_offset_mapping = batch_encoding.offsets - char_to_token: Optional[Callable[[int], Optional[int]]] - char_to_token = functools.partial( - batch_encoding.char_to_token, sequence_index=sequence_index - ) - token_offset_mapping = [ - offsets if s_id == sequence_index else (0, 0) - for s_id, offsets in zip(batch_encoding.sequence_ids, token_offset_mapping) - ] - if partition.start > 0: - token_offset_mapping = [ - (start + partition.start, end + partition.start) - for start, end in token_offset_mapping - ] - char_to_token = None - current_added_annotations: Dict[str, Dict[Annotation, Annotation]] = defaultdict(dict) - tokenized_document = text_based_document_to_token_based( - doc, - tokens=batch_encoding.tokens, - result_document_type=result_document_type, - token_offset_mapping=token_offset_mapping, - char_to_token=char_to_token, - strict_span_conversion=False, - strip_spans=strip_spans, - verbose=False, - added_annotations=current_added_annotations, - ) - tokenized_document.metadata["tokenizer_encoding"] = batch_encoding - result.append(tokenized_document) - for k, v in current_added_annotations.items(): - added_annotation_lists[k].extend(v) - if added_annotations is not None: - added_annotations.append(current_added_annotations) - - missed_annotations = defaultdict(set) - if strict_span_conversion or verbose: - # We check the annotations with respect to the layers of the result_document_type. - # Note that the original document may have more layers, but since result documents - # are of type result_document_type, we only check the layers of this type. - for annotation_field in result_document_type.annotation_fields(): - # do not check the partition layer because the partitions are not required later on - # and entries get quite probably removed when windowing is applied, so this just pollutes the logs - if annotation_field.name != partition_layer: - current_missed_annotations = set(doc[annotation_field.name]) - set( - added_annotation_lists[annotation_field.name] - ) - if len(current_missed_annotations) > 0: - missed_annotations[annotation_field.name] = current_missed_annotations - - if len(missed_annotations) > 0: - missed_annotations_simplified = {k: str(v) for k, v in missed_annotations.items()} - if strict_span_conversion: - raise ValueError( - f"could not convert all annotations from document with id={doc.id} to token based documents, " - f"but strict_span_conversion is True, so raise an error, " - f"missed annotations:\n{json.dumps(missed_annotations_simplified, sort_keys=True, indent=2)}" - ) - else: - if verbose: - logger.warning( - f"could not convert all annotations from document with id={doc.id} to token based documents, " - f"missed annotations (disable this message with verbose=False):\n" - f"{json.dumps(missed_annotations_simplified, sort_keys=True, indent=2)}" - ) - - return result diff --git a/src/pie_modules/metrics/__init__.py b/src/pie_modules/metrics/__init__.py index 7038ade07..674d2ef0c 100644 --- a/src/pie_modules/metrics/__init__.py +++ b/src/pie_modules/metrics/__init__.py @@ -8,5 +8,4 @@ FieldLengthCollector, LabelCountCollector, SubFieldLengthCollector, - TokenCountCollector, ) diff --git a/src/pie_modules/metrics/relation_argument_distance_collector.py b/src/pie_modules/metrics/relation_argument_distance_collector.py index ff0cdeb1c..138d3ede2 100644 --- a/src/pie_modules/metrics/relation_argument_distance_collector.py +++ b/src/pie_modules/metrics/relation_argument_distance_collector.py @@ -1,13 +1,9 @@ from collections import defaultdict -from typing import Any, Dict, List, Optional, Type, Union +from typing import Dict, List from pie_core import Document, DocumentStatistic -from pie_core.utils.hydra import resolve_target -from transformers import AutoTokenizer, PreTrainedTokenizer from pie_modules.annotations import BinaryRelation, NaryRelation, Span -from pie_modules.document.processing import tokenize_document -from pie_modules.documents import TextBasedDocument, TokenBasedDocument from pie_modules.utils.span import distance @@ -33,10 +29,6 @@ def __init__( self, layer: str, distance_type: str = "outer", - tokenize: bool = False, - tokenize_kwargs: Optional[Dict[str, Any]] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - tokenized_document_type: Optional[Union[str, Type[TokenBasedDocument]]] = None, key_all: str = "ALL", **kwargs, ): @@ -44,38 +36,9 @@ def __init__( self.layer = layer self.distance_type = distance_type self.key_all = key_all - self.tokenize = tokenize - self.tokenize_kwargs = tokenize_kwargs or {} - if self.tokenize: - if tokenizer is None: - raise ValueError( - "tokenizer must be provided to calculate distance in means of tokens" - ) - if isinstance(tokenizer, str): - tokenizer = AutoTokenizer.from_pretrained(tokenizer) - self.tokenizer = tokenizer - if tokenized_document_type is None: - raise ValueError( - "tokenized_document_type must be provided to calculate distance in means of tokens" - ) - self.tokenized_document_type: Type[TokenBasedDocument] = resolve_target( - tokenized_document_type - ) def _collect(self, doc: Document) -> Dict[str, List[float]]: - if self.tokenize: - if not isinstance(doc, TextBasedDocument): - raise ValueError( - "doc must be a TextBasedDocument to calculate distance in means of tokens" - ) - docs = tokenize_document( - doc, - tokenizer=self.tokenizer, - result_document_type=self.tokenized_document_type, - **self.tokenize_kwargs, - ) - else: - docs = [doc] + docs = [doc] values: Dict[str, List[float]] = defaultdict(list) for doc in docs: layer_obj = getattr(doc, self.layer) diff --git a/src/pie_modules/metrics/span_coverage_collector.py b/src/pie_modules/metrics/span_coverage_collector.py index df783f228..eb9338386 100644 --- a/src/pie_modules/metrics/span_coverage_collector.py +++ b/src/pie_modules/metrics/span_coverage_collector.py @@ -1,13 +1,9 @@ import logging -from typing import Any, Dict, List, Optional, Set, Type, Union +from typing import List, Optional, Set, Union from pie_core import Document, DocumentStatistic -from pie_core.utils.hydra import resolve_type -from transformers import AutoTokenizer, PreTrainedTokenizer from pie_modules.annotations import LabeledMultiSpan, Span -from pie_modules.document.processing import tokenize_document -from pie_modules.documents import TextBasedDocument, TokenBasedDocument logger = logging.getLogger(__name__) @@ -36,57 +32,16 @@ class SpanCoverageCollector(DocumentStatistic): def __init__( self, layer: str, - tokenize: bool = False, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - tokenized_document_type: Optional[Union[str, Type[TokenBasedDocument]]] = None, labels: Optional[Union[List[str], str]] = None, label_attribute: str = "label", - tokenize_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ): super().__init__(**kwargs) self.layer = layer self.labels = labels self.label_field = label_attribute - self.tokenize = tokenize - if self.tokenize: - if tokenizer is None: - raise ValueError( - "tokenizer must be provided to calculate the span coverage in means of tokens" - ) - if isinstance(tokenizer, str): - tokenizer = AutoTokenizer.from_pretrained(tokenizer) - self.tokenizer = tokenizer - if tokenized_document_type is None: - raise ValueError( - "tokenized_document_type must be provided to calculate the span coverage in means of tokens" - ) - self.tokenized_document_type = resolve_type( - tokenized_document_type, expected_super_type=TokenBasedDocument - ) - self.tokenize_kwargs = tokenize_kwargs or {} def _collect(self, doc: Document) -> float: - docs: Union[List[Document], List[TokenBasedDocument]] - if self.tokenize: - if not isinstance(doc, TextBasedDocument): - raise ValueError( - "doc must be a TextBasedDocument to calculate the span coverage in means of tokens" - ) - docs = tokenize_document( - doc, - tokenizer=self.tokenizer, - result_document_type=self.tokenized_document_type, - **self.tokenize_kwargs, - ) - if len(docs) != 1: - raise ValueError( - "tokenization of a single document must result in a single document to calculate the " - "span coverage correctly. Please check your tokenization settings, especially that " - "no windowing is applied because of max input length restrictions." - ) - doc = docs[0] - layer_obj = getattr(doc, self.layer) target = layer_obj.target covered_indices: Set[int] = set() diff --git a/src/pie_modules/metrics/span_length_collector.py b/src/pie_modules/metrics/span_length_collector.py index 66b0a8db7..649a8e801 100644 --- a/src/pie_modules/metrics/span_length_collector.py +++ b/src/pie_modules/metrics/span_length_collector.py @@ -1,14 +1,10 @@ import logging from collections import defaultdict -from typing import Any, Dict, List, Optional, Type, Union +from typing import Dict, List, Optional, Union from pie_core import Document, DocumentStatistic -from pie_core.utils.hydra import resolve_type -from transformers import AutoTokenizer, PreTrainedTokenizer from pie_modules.annotations import Span -from pie_modules.document.processing import tokenize_document -from pie_modules.documents import TextBasedDocument, TokenBasedDocument logger = logging.getLogger(__name__) @@ -26,12 +22,8 @@ class SpanLengthCollector(DocumentStatistic): def __init__( self, layer: str, - tokenize: bool = False, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - tokenized_document_type: Optional[Union[str, Type[TokenBasedDocument]]] = None, labels: Optional[Union[List[str], str]] = None, label_attribute: str = "label", - tokenize_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ): super().__init__(**kwargs) @@ -40,39 +32,9 @@ def __init__( raise ValueError("labels must be a list of strings or 'INFERRED'") self.labels = labels self.label_field = label_attribute - self.tokenize = tokenize - if self.tokenize: - if tokenizer is None: - raise ValueError( - "tokenizer must be provided to calculate the span length in means of tokens" - ) - if isinstance(tokenizer, str): - tokenizer = AutoTokenizer.from_pretrained(tokenizer) - self.tokenizer = tokenizer - if tokenized_document_type is None: - raise ValueError( - "tokenized_document_type must be provided to calculate the span length in means of tokens" - ) - self.tokenized_document_type = resolve_type( - tokenized_document_type, expected_super_type=TokenBasedDocument - ) - self.tokenize_kwargs = tokenize_kwargs or {} def _collect(self, doc: Document) -> Union[List[int], Dict[str, List[int]]]: - docs: Union[List[Document], List[TokenBasedDocument]] - if self.tokenize: - if not isinstance(doc, TextBasedDocument): - raise ValueError( - "doc must be a TextBasedDocument to calculate the span length in means of tokens" - ) - docs = tokenize_document( - doc, - tokenizer=self.tokenizer, - result_document_type=self.tokenized_document_type, - **self.tokenize_kwargs, - ) - else: - docs = [doc] + docs = [doc] values: Dict[str, List[int]] if isinstance(self.labels, str): diff --git a/src/pie_modules/metrics/statistics.py b/src/pie_modules/metrics/statistics.py index 16b9d41d3..9802b6356 100644 --- a/src/pie_modules/metrics/statistics.py +++ b/src/pie_modules/metrics/statistics.py @@ -1,46 +1,12 @@ import logging from collections import defaultdict -from typing import Any, Callable, Dict, List, Optional, Type, Union +from typing import Any, Callable, Dict, List, Union from pie_core import Document, DocumentStatistic -from transformers import AutoTokenizer, PreTrainedTokenizer - -from pie_modules.documents import TextBasedDocument logger = logging.getLogger(__name__) -class TokenCountCollector(DocumentStatistic): - """Collects the token count of a field when tokenizing its content with a Huggingface - tokenizer. - - The content of the field should be a string. - """ - - def __init__( - self, - tokenizer: Union[str, PreTrainedTokenizer], - text_field: str = "text", - tokenizer_kwargs: Optional[Dict[str, Any]] = None, - document_type: Optional[Type[Document]] = None, - **kwargs, - ): - if document_type is None and text_field == "text": - document_type = TextBasedDocument - super().__init__(document_type=document_type, **kwargs) - self.tokenizer = ( - AutoTokenizer.from_pretrained(tokenizer) if isinstance(tokenizer, str) else tokenizer - ) - self.tokenizer_kwargs = tokenizer_kwargs or {} - self.text_field = text_field - - def _collect(self, doc: Document) -> int: - text = getattr(doc, self.text_field) - encodings = self.tokenizer(text, **self.tokenizer_kwargs) - tokens = encodings.tokens() - return len(tokens) - - class FieldLengthCollector(DocumentStatistic): """Collects the length of a field, e.g. to collect the number the characters in the input text. diff --git a/src/pie_modules/utils/tokenization.py b/src/pie_modules/utils/tokenization.py deleted file mode 100644 index 85e89d448..000000000 --- a/src/pie_modules/utils/tokenization.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import TypeVar - -from transformers import BatchEncoding - -from pie_modules.annotations import Span - -S = TypeVar("S", bound=Span) - - -class SpanNotAlignedWithTokenException(Exception): - def __init__(self, span): - self.span = span - - -def get_aligned_token_span(encoding: BatchEncoding, char_span: S) -> S: - # find the start - token_start = None - token_end_before = None - char_start = None - for idx in range(char_span.start, char_span.end): - token_start = encoding.char_to_token(idx) - if token_start is not None: - char_start = idx - break - - if char_start is None: - raise SpanNotAlignedWithTokenException(span=char_span) - for idx in range(char_span.end - 1, char_start - 1, -1): - token_end_before = encoding.char_to_token(idx) - if token_end_before is not None: - break - - if token_start is None or token_end_before is None: - raise SpanNotAlignedWithTokenException(span=char_span) - - return char_span.copy(start=token_start, end=token_end_before + 1) diff --git a/tests/document/processing/test_tokenization.py b/tests/document/processing/test_tokenization.py index 969fa06ed..c6fdacd17 100644 --- a/tests/document/processing/test_tokenization.py +++ b/tests/document/processing/test_tokenization.py @@ -4,7 +4,6 @@ import pytest from pie_core import Annotation, AnnotationLayer, Document, annotation_field -from transformers import AutoTokenizer, PreTrainedTokenizer from pie_modules.annotations import ( BinaryRelation, @@ -16,7 +15,6 @@ from pie_modules.document.processing import ( text_based_document_to_token_based, token_based_document_to_text_based, - tokenize_document, ) from pie_modules.document.processing.tokenization import find_token_offset_mapping from pie_modules.documents import TextBasedDocument, TokenBasedDocument @@ -229,11 +227,6 @@ def _test_token_document_with_multi_spans(doc): ] -@pytest.fixture(scope="module") -def tokenizer() -> PreTrainedTokenizer: - return AutoTokenizer.from_pretrained("bert-base-cased") - - def test_find_token_offset_mapping(text_document, token_document): token_offset_mapping = find_token_offset_mapping( text=text_document.text, tokens=list(token_document.tokens) @@ -652,368 +645,3 @@ class WrongAnnotationType(TokenBasedDocument): == "can not convert layers that target the tokens but contain non-span annotations, " "but found " ) - - -def test_tokenize_document(text_document, tokenizer): - added_annotations = [] - tokenized_docs = tokenize_document( - text_document, - tokenizer=tokenizer, - result_document_type=TokenizedTestDocument, - added_annotations=added_annotations, - ) - assert len(tokenized_docs) == 1 - tokenized_doc = tokenized_docs[0] - - # check (de-)serialization - tokenized_doc.copy() - - assert ( - tokenized_doc.metadata["text"] - == text_document.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ( - "[CLS]", - "First", - "sentence", - ".", - "En", - "##ti", - "##ty", - "M", - "works", - "at", - "N", - ".", - "And", - "it", - "founded", - "O", - ".", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == len(text_document.sentences) == 3 - sentences = [str(sentence) for sentence in tokenized_doc.sentences] - assert sentences == [ - "('First', 'sentence', '.')", - "('En', '##ti', '##ty', 'M', 'works', 'at', 'N', '.')", - "('And', 'it', 'founded', 'O', '.')", - ] - assert len(tokenized_doc.entities) == len(text_document.entities) == 4 - entities = [str(entity) for entity in tokenized_doc.entities] - assert entities == ["('En', '##ti', '##ty', 'M')", "('N',)", "('it',)", "('O',)"] - assert len(tokenized_doc.relations) == len(text_document.relations) == 2 - relation_tuples = [ - (str(rel.head), rel.label, str(rel.tail)) for rel in tokenized_doc.relations - ] - assert relation_tuples == [ - ("('En', '##ti', '##ty', 'M')", "per:employee_of", "('N',)"), - ("('it',)", "per:founder", "('O',)"), - ] - - assert len(added_annotations) == 1 - first_added_annotations = added_annotations[0] - _assert_added_annotations(text_document, tokenized_doc, first_added_annotations) - - -def test_tokenize_document_max_length(text_document, tokenizer, caplog): - added_annotations = [] - caplog.clear() - with caplog.at_level("WARNING"): - tokenized_docs = tokenize_document( - text_document, - tokenizer=tokenizer, - result_document_type=TokenizedTestDocument, - # max_length is set to 10, so the document is split into two parts - strict_span_conversion=False, - max_length=10, - return_overflowing_tokens=True, - added_annotations=added_annotations, - ) - assert len(caplog.records) == 1 - assert ( - caplog.records[0].message - == "could not convert all annotations from document with id=None to token based documents, missed annotations " - "(disable this message with verbose=False):\n" - "{\n" - ' "relations": "{BinaryRelation(head=LabeledSpan(start=16, end=24, label=\'per\', score=1.0), ' - "tail=LabeledSpan(start=34, end=35, label='org', score=1.0), label='per:employee_of', score=1.0)}\",\n" - ' "sentences": "{Span(start=16, end=36)}"\n' - "}" - ) - assert len(tokenized_docs) == 2 - assert len(added_annotations) == 2 - tokenized_doc = tokenized_docs[0] - - # check (de-)serialization - tokenized_doc.copy() - - assert ( - tokenized_doc.metadata["text"] - == text_document.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ( - "[CLS]", - "First", - "sentence", - ".", - "En", - "##ti", - "##ty", - "M", - "works", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == 1 - sentences = [str(sentence) for sentence in tokenized_doc.sentences] - assert sentences == ["('First', 'sentence', '.')"] - assert len(tokenized_doc.entities) == 1 - entities = [str(entity) for entity in tokenized_doc.entities] - assert entities == ["('En', '##ti', '##ty', 'M')"] - assert len(tokenized_doc.relations) == 0 - # check annotation mapping - current_added_annotations = added_annotations[0] - # no relations are added in the first tokenized document - assert set(current_added_annotations) == {"sentences", "entities"} - # check sentences - sentence_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["sentences"].items() - } - assert sentence_mapping == {"First sentence.": ("First", "sentence", ".")} - # check entities - entity_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["entities"].items() - } - assert entity_mapping == {("per", "Entity M"): ("per", ("En", "##ti", "##ty", "M"))} - - tokenized_doc = tokenized_docs[1] - - # check (de-)serialization - tokenized_doc.copy() - - assert ( - tokenized_doc.metadata["text"] - == text_document.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ( - "[CLS]", - "at", - "N", - ".", - "And", - "it", - "founded", - "O", - ".", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == 1 - sentences = [str(sentence) for sentence in tokenized_doc.sentences] - assert sentences == ["('And', 'it', 'founded', 'O', '.')"] - assert len(tokenized_doc.entities) == 3 - entities = [str(entity) for entity in tokenized_doc.entities] - assert entities == ["('N',)", "('it',)", "('O',)"] - assert len(tokenized_doc.relations) == 1 - relation_tuples = [ - (str(rel.head), rel.label, str(rel.tail)) for rel in tokenized_doc.relations - ] - assert relation_tuples == [("('it',)", "per:founder", "('O',)")] - # check annotation mapping - current_added_annotations = added_annotations[1] - assert set(current_added_annotations) == {"sentences", "entities", "relations"} - # check sentences - sentence_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["sentences"].items() - } - assert sentence_mapping == {"And it founded O.": ("And", "it", "founded", "O", ".")} - # check entities - entity_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["entities"].items() - } - assert entity_mapping == { - ("org", "N"): ("org", ("N",)), - ("per", "it"): ("per", ("it",)), - ("org", "O"): ("org", ("O",)), - } - # check relations - relation_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["relations"].items() - } - assert relation_mapping == { - ("per:founder", (("per", "it"), ("org", "O"))): ( - "per:founder", - (("per", ("it",)), ("org", ("O",))), - ) - } - - -def test_tokenize_document_max_length_strict(text_document, tokenizer): - with pytest.raises(ValueError) as excinfo: - tokenize_document( - text_document, - tokenizer=tokenizer, - result_document_type=TokenizedTestDocument, - # max_length is set to 10, so the document is split into two parts - strict_span_conversion=True, - max_length=10, - return_overflowing_tokens=True, - ) - assert ( - str(excinfo.value) - == "could not convert all annotations from document with id=None to token based documents, " - "but strict_span_conversion is True, so raise an error, missed annotations:\n" - "{\n" - ' "relations": "{BinaryRelation(head=LabeledSpan(start=16, end=24, label=\'per\', score=1.0), ' - "tail=LabeledSpan(start=34, end=35, label='org', score=1.0), label='per:employee_of', score=1.0)}\",\n" - ' "sentences": "{Span(start=16, end=36)}"\n' - "}" - ) - - -def test_tokenize_document_partition(text_document, tokenizer): - added_annotations = [] - tokenized_docs = tokenize_document( - text_document, - tokenizer=tokenizer, - result_document_type=TokenizedTestDocument, - partition_layer="sentences", - added_annotations=added_annotations, - ) - assert len(tokenized_docs) == 3 - assert len(added_annotations) == 3 - tokenized_doc = tokenized_docs[0] - - # check (de-)serialization - tokenized_doc.copy() - - assert ( - tokenized_doc.metadata["text"] - == text_document.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ("[CLS]", "First", "sentence", ".", "[SEP]") - assert len(tokenized_doc.sentences) == 1 - assert len(tokenized_doc.entities) == 0 - assert len(tokenized_doc.relations) == 0 - - # check annotation mapping - current_added_annotations = added_annotations[0] - assert set(current_added_annotations) == {"sentences"} - # check sentences - sentence_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["sentences"].items() - } - assert sentence_mapping == {"First sentence.": ("First", "sentence", ".")} - - tokenized_doc = tokenized_docs[1] - - # check (de-)serialization - tokenized_doc.copy() - - assert ( - tokenized_doc.metadata["text"] - == text_document.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ( - "[CLS]", - "En", - "##ti", - "##ty", - "M", - "works", - "at", - "N", - ".", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == 1 - sentences = [str(sentence) for sentence in tokenized_doc.sentences] - assert sentences == ["('En', '##ti', '##ty', 'M', 'works', 'at', 'N', '.')"] - assert len(tokenized_doc.entities) == 2 - entities = [str(entity) for entity in tokenized_doc.entities] - assert entities == ["('En', '##ti', '##ty', 'M')", "('N',)"] - assert len(tokenized_doc.relations) == 1 - relation_tuples = [ - (str(rel.head), rel.label, str(rel.tail)) for rel in tokenized_doc.relations - ] - assert relation_tuples == [("('En', '##ti', '##ty', 'M')", "per:employee_of", "('N',)")] - - # check annotation mapping - current_added_annotations = added_annotations[1] - assert set(current_added_annotations) == {"sentences", "entities", "relations"} - # check sentences - sentence_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["sentences"].items() - } - assert sentence_mapping == { - "Entity M works at N.": ("En", "##ti", "##ty", "M", "works", "at", "N", ".") - } - # check entities - entity_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["entities"].items() - } - assert entity_mapping == { - ("per", "Entity M"): ("per", ("En", "##ti", "##ty", "M")), - ("org", "N"): ("org", ("N",)), - } - # check relations - relation_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["relations"].items() - } - assert relation_mapping == { - ("per:employee_of", (("per", "Entity M"), ("org", "N"))): ( - "per:employee_of", - (("per", ("En", "##ti", "##ty", "M")), ("org", ("N",))), - ) - } - - tokenized_doc = tokenized_docs[2] - - # check (de-)serialization - tokenized_doc.copy() - - assert ( - tokenized_doc.metadata["text"] - == text_document.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ("[CLS]", "And", "it", "founded", "O", ".", "[SEP]") - assert len(tokenized_doc.sentences) == 1 - sentences = [str(sentence) for sentence in tokenized_doc.sentences] - assert sentences == ["('And', 'it', 'founded', 'O', '.')"] - assert len(tokenized_doc.entities) == 2 - entities = [str(entity) for entity in tokenized_doc.entities] - assert entities == ["('it',)", "('O',)"] - assert len(tokenized_doc.relations) == 1 - relation_tuples = [ - (str(rel.head), rel.label, str(rel.tail)) for rel in tokenized_doc.relations - ] - assert relation_tuples == [("('it',)", "per:founder", "('O',)")] - - # check annotation mapping - current_added_annotations = added_annotations[2] - assert set(current_added_annotations) == {"sentences", "entities", "relations"} - # check sentences - sentence_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["sentences"].items() - } - assert sentence_mapping == {"And it founded O.": ("And", "it", "founded", "O", ".")} - # check entities - entity_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["entities"].items() - } - assert entity_mapping == {("per", "it"): ("per", ("it",)), ("org", "O"): ("org", ("O",))} - # check relations - relation_mapping = { - k.resolve(): v.resolve() for k, v in current_added_annotations["relations"].items() - } - assert relation_mapping == { - ("per:founder", (("per", "it"), ("org", "O"))): ( - "per:founder", - (("per", ("it",)), ("org", ("O",))), - ) - } diff --git a/tests/metrics/test_relation_argument_distance_collector.py b/tests/metrics/test_relation_argument_distance_collector.py index f51d6e819..2dc05ce90 100644 --- a/tests/metrics/test_relation_argument_distance_collector.py +++ b/tests/metrics/test_relation_argument_distance_collector.py @@ -82,107 +82,7 @@ def test_relation_argument_distance_collector_with_n_ary_relation(): } -def test_relation_argument_distance_collector_with_tokenize(): - doc = TestDocument( - text="This is the first entity. This is the second entity. And, this is the third entity." - ) - - doc.entities.append(LabeledSpan(start=0, end=25, label="entity")) - doc.entities.append(LabeledSpan(start=26, end=52, label="entity")) - doc.entities.append(LabeledSpan(start=53, end=83, label="entity")) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="relation_label_1") - ) - doc.relations.append( - BinaryRelation(head=doc.entities[1], tail=doc.entities[2], label="relation_label_2") - ) - - @dataclasses.dataclass - class TokenizedTestDocument(TokenBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="tokens") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="entities") - - statistic = RelationArgumentDistanceCollector( - layer="relations", - tokenize=True, - tokenizer="bert-base-uncased", - tokenized_document_type=TokenizedTestDocument, - ) - values = statistic(doc) - assert values == { - "ALL": {"len": 4, "mean": 13.0, "std": 1.0, "min": 12.0, "max": 14.0}, - "relation_label_1": {"len": 2, "mean": 12.0, "std": 0.0, "min": 12.0, "max": 12.0}, - "relation_label_2": {"len": 2, "mean": 14.0, "std": 0.0, "min": 14.0, "max": 14.0}, - } - - -def test_relation_argument_distance_collector_with_tokenize_missing_tokenizer(): - with pytest.raises(ValueError) as excinfo: - RelationArgumentDistanceCollector( - layer="relations", - tokenize=True, - tokenized_document_type=TokenBasedDocument, - ) - assert ( - str(excinfo.value) == "tokenizer must be provided to calculate distance in means of tokens" - ) - - -def test_relation_argument_distance_collector_with_tokenize_missing_tokenized_document_type(): - with pytest.raises(ValueError) as excinfo: - RelationArgumentDistanceCollector( - layer="relations", - tokenize=True, - tokenizer="bert-base-uncased", - ) - assert ( - str(excinfo.value) - == "tokenized_document_type must be provided to calculate distance in means of tokens" - ) - - -def test_relation_argument_distance_collector_with_tokenize_wrong_document_type(): - @dataclasses.dataclass - class TestDocument(Document): - data: str - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="data") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="entities") - - doc = TestDocument( - data="This is the first entity. This is the second entity. This is the third entity." - ) - - doc.entities.append(LabeledSpan(start=0, end=25, label="entity")) - doc.entities.append(LabeledSpan(start=26, end=52, label="entity")) - doc.entities.append(LabeledSpan(start=53, end=78, label="entity")) - doc.relations.append( - BinaryRelation(head=doc.entities[0], tail=doc.entities[1], label="relation_label_1") - ) - doc.relations.append( - BinaryRelation(head=doc.entities[1], tail=doc.entities[2], label="relation_label_2") - ) - - @dataclasses.dataclass - class TokenizedTestDocument(TokenBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="tokens") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="entities") - - statistic = RelationArgumentDistanceCollector( - layer="relations", - tokenize=True, - tokenizer="bert-base-uncased", - tokenized_document_type=TokenizedTestDocument, - ) - - with pytest.raises(ValueError) as excinfo: - statistic(doc) - assert ( - str(excinfo.value) - == "doc must be a TextBasedDocument to calculate distance in means of tokens" - ) - - -def test_relation_argument_distance_collector_with_tokenize_wrong_span_annotation_type(): +def test_relation_argument_distance_collector_with_wrong_span_annotation_type(): @dataclasses.dataclass(eq=True, frozen=True) class UnknownSpan(Annotation): start: int @@ -216,7 +116,7 @@ class TestDocument(TextBasedDocument): ) -def test_relation_argument_distance_collector_with_tokenize_wrong_relation_annotation_type(): +def test_relation_argument_distance_collector_with_wrong_relation_annotation_type(): @dataclasses.dataclass(eq=True, frozen=True) class UnknownRelation(Annotation): head: Annotation diff --git a/tests/metrics/test_span_coverage_collector.py b/tests/metrics/test_span_coverage_collector.py index 026ec28b7..109352df3 100644 --- a/tests/metrics/test_span_coverage_collector.py +++ b/tests/metrics/test_span_coverage_collector.py @@ -1,10 +1,10 @@ import dataclasses import pytest -from pie_core import Annotation, AnnotationLayer, Document, annotation_field +from pie_core import Annotation, AnnotationLayer, annotation_field from pie_modules.annotations import LabeledMultiSpan, LabeledSpan -from pie_modules.documents import TextBasedDocument, TokenBasedDocument +from pie_modules.documents import TextBasedDocument from pie_modules.metrics import SpanCoverageCollector @@ -53,85 +53,7 @@ def test_span_coverage_collector_with_labels(): assert values == {"len": 1, "max": 0.125, "mean": 0.125, "min": 0.125, "std": 0.0} -def test_span_coverage_collector_with_tokenize(): - doc = TestDocument(text="A and O.") - doc.entities.append(LabeledSpan(start=0, end=1, label="entity")) - doc.entities.append(LabeledSpan(start=6, end=7, label="entity")) - - @dataclasses.dataclass - class TokenizedTestDocument(TokenBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="tokens") - - statistic = SpanCoverageCollector( - layer="entities", - tokenize=True, - tokenizer="bert-base-uncased", - tokenized_document_type=TokenizedTestDocument, - ) - values = statistic(doc) - assert values == { - "len": 1, - "max": 0.3333333333333333, - "mean": 0.3333333333333333, - "min": 0.3333333333333333, - "std": 0.0, - } - - -def test_span_coverage_collector_with_tokenize_missing_tokenizer(): - with pytest.raises(ValueError) as excinfo: - SpanCoverageCollector( - layer="entities", - tokenize=True, - tokenized_document_type=TokenBasedDocument, - ) - assert ( - str(excinfo.value) - == "tokenizer must be provided to calculate the span coverage in means of tokens" - ) - - -def test_span_coverage_collector_with_tokenize_missing_tokenized_document_type(): - with pytest.raises(ValueError) as excinfo: - SpanCoverageCollector( - layer="entities", - tokenize=True, - tokenizer="bert-base-uncased", - ) - assert ( - str(excinfo.value) - == "tokenized_document_type must be provided to calculate the span coverage in means of tokens" - ) - - -def test_span_coverage_collector_with_tokenize_wrong_document_type(): - @dataclasses.dataclass - class TestDocument(Document): - data: str - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="data") - - doc = TestDocument(data="A and O") - - @dataclasses.dataclass - class TokenizedTestDocument(TokenBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="tokens") - - statistic = SpanCoverageCollector( - layer="entities", - tokenize=True, - tokenizer="bert-base-uncased", - tokenized_document_type=TokenizedTestDocument, - ) - - with pytest.raises(ValueError) as excinfo: - statistic(doc) - assert ( - str(excinfo.value) - == "doc must be a TextBasedDocument to calculate the span coverage in means of tokens" - ) - - -def test_span_coverage_collector_with_tokenize_wrong_annotation_type(): +def test_span_coverage_collector_with_wrong_annotation_type(): @dataclasses.dataclass(eq=True, frozen=True) class UnknownSpan(Annotation): start: int diff --git a/tests/metrics/test_span_length_collector.py b/tests/metrics/test_span_length_collector.py index 9a071c04e..f482b1066 100644 --- a/tests/metrics/test_span_length_collector.py +++ b/tests/metrics/test_span_length_collector.py @@ -78,83 +78,7 @@ def test_span_length_collector_wrong_label_value(): assert str(excinfo.value) == "labels must be a list of strings or 'INFERRED'" -def test_span_length_collector_with_tokenize(documents): - @dataclasses.dataclass - class TokenizedTestDocument(TokenBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="tokens") - - statistic = SpanLengthCollector( - layer="entities", - tokenize=True, - tokenizer="bert-base-uncased", - tokenized_document_type=TokenizedTestDocument, - ) - values = statistic(documents) - assert values == { - "len": 7, - "max": 3, - "mean": 1.8571428571428572, - "min": 1, - "std": 0.8329931278350429, - } - - -def test_span_length_collector_with_tokenize_missing_tokenizer(): - with pytest.raises(ValueError) as excinfo: - SpanLengthCollector( - layer="entities", - tokenize=True, - tokenized_document_type=TokenBasedDocument, - ) - assert ( - str(excinfo.value) - == "tokenizer must be provided to calculate the span length in means of tokens" - ) - - -def test_span_length_collector_with_tokenize_missing_tokenized_document_type(): - with pytest.raises(ValueError) as excinfo: - SpanLengthCollector( - layer="entities", - tokenize=True, - tokenizer="bert-base-uncased", - ) - assert ( - str(excinfo.value) - == "tokenized_document_type must be provided to calculate the span length in means of tokens" - ) - - -def test_span_length_collector_with_tokenize_wrong_document_type(): - @dataclasses.dataclass - class TestDocument(Document): - data: str - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="data") - - doc = TestDocument(data="First sentence. Entity M works at N. And it founded O.") - doc.entities.append(LabeledSpan(start=16, end=24, label="per")) - assert str(doc.entities[0]) == "Entity M" - - @dataclasses.dataclass - class TokenizedTestDocument(TokenBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="tokens") - - statistic = SpanLengthCollector( - layer="entities", - tokenize=True, - tokenizer="bert-base-uncased", - tokenized_document_type=TokenizedTestDocument, - ) - - with pytest.raises(ValueError) as excinfo: - statistic(doc) - assert ( - str(excinfo.value) - == "doc must be a TextBasedDocument to calculate the span length in means of tokens" - ) - - -def test_span_length_collector_with_tokenize_wrong_annotation_type(): +def test_span_length_collector_with_wrong_annotation_type(): @dataclasses.dataclass class TestDocument(TextBasedDocument): label: AnnotationLayer[Label] = annotation_field() diff --git a/tests/metrics/test_statistics.py b/tests/metrics/test_statistics.py index 4d1325795..951dbd57c 100644 --- a/tests/metrics/test_statistics.py +++ b/tests/metrics/test_statistics.py @@ -3,7 +3,6 @@ FieldLengthCollector, LabelCountCollector, SubFieldLengthCollector, - TokenCountCollector, ) @@ -71,17 +70,3 @@ def test_statistics(document_dataset): "val": {"mean": 3.0, "std": 0.0, "min": 3, "max": 3}, "train": {"mean": 3.0, "std": 0.0, "min": 3, "max": 3}, } - - -def test_statistics_with_tokenize(document_dataset): - statistic = TokenCountCollector( - text_field="text", - tokenizer="bert-base-uncased", - tokenizer_kwargs=dict(add_special_tokens=False), - ) - values = statistic(document_dataset) - assert values == { - "test": {"max": 13, "mean": 8.5, "min": 4, "std": 4.5}, - "train": {"max": 14, "mean": 8.285714285714286, "min": 4, "std": 3.5742845723419436}, - "val": {"max": 13, "mean": 8.5, "min": 4, "std": 4.5}, - } diff --git a/tests/utils/test_tokenization.py b/tests/utils/test_tokenization.py deleted file mode 100644 index 24aa94333..000000000 --- a/tests/utils/test_tokenization.py +++ /dev/null @@ -1,55 +0,0 @@ -import pytest -from transformers import AutoTokenizer - -from pie_modules.annotations import Span -from pie_modules.utils.tokenization import ( - SpanNotAlignedWithTokenException, - get_aligned_token_span, -) - - -def test_get_aligned_token_span(): - tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") - - text = "Hello, world!" - encoding = tokenizer(text) - tokens = tokenizer.convert_ids_to_tokens(encoding.input_ids) - assert tokens == ["[CLS]", "Hello", ",", "world", "!", "[SEP]"] - - # already aligned - char_span = Span(0, 5) - assert text[char_span.start : char_span.end] == "Hello" - token_span = get_aligned_token_span(encoding=encoding, char_span=char_span) - assert tokens[token_span.start : token_span.end] == ["Hello"] - - # end not aligned - char_span = Span(5, 7) - assert text[char_span.start : char_span.end] == ", " - token_span = get_aligned_token_span(encoding=encoding, char_span=char_span) - assert tokens[token_span.start : token_span.end] == [","] - - # start not aligned - char_span = Span(6, 12) - assert text[char_span.start : char_span.end] == " world" - token_span = get_aligned_token_span(encoding=encoding, char_span=char_span) - assert tokens[token_span.start : token_span.end] == ["world"] - - # start not aligned, end inside token - char_span = Span(6, 8) - assert text[char_span.start : char_span.end] == " w" - token_span = get_aligned_token_span(encoding=encoding, char_span=char_span) - assert tokens[token_span.start : token_span.end] == ["world"] - - # empty char span - char_span = Span(2, 2) - assert text[char_span.start : char_span.end] == "" - with pytest.raises(SpanNotAlignedWithTokenException) as e: - get_aligned_token_span(encoding=encoding, char_span=char_span) - assert e.value.span == char_span - - # empty token span - char_span = Span(6, 7) - assert text[char_span.start : char_span.end] == " " - with pytest.raises(SpanNotAlignedWithTokenException) as e: - get_aligned_token_span(encoding=encoding, char_span=char_span) - assert e.value.span == char_span From 727677f2f858fd26fb0e4af07e6f6d1501229ac0 Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Sun, 27 Apr 2025 18:15:59 +0200 Subject: [PATCH 05/11] adjust README.md --- README.md | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index d45e7afa1..2f3b7d2fd 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # pie-modules -PyTorch -Lightning -PyTorch-IE
+PythonIE
[![PyPI](https://img.shields.io/pypi/v/pie-modules.svg)][pypi status] [![Tests](https://github.com/arnebinder/pie-modules/workflows/Tests/badge.svg)][tests] @@ -10,31 +8,16 @@ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit] [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black] -Model-, taskmodule-, and metric-implementations as well as document processing utilities for [PyTorch-IE](https://github.com/ChristophAlt/pytorch-ie). +Annotation-, document- and metric implementations as well as utilities for [Python-IE](https://github.com/ArneBinder/pie-core). -Available models: +Available annotation types: see [here](src/pie_modules/annotations.py). -- [SimpleSequenceClassificationModel](src/pie_modules/models/simple_sequence_classification.py) -- [SequenceClassificationModelWithPooler](src/pie_modules/models/sequence_classification_with_pooler.py) -- [SequencePairSimilarityModelWithPooler](src/pie_modules/models/sequence_classification_with_pooler.py) -- [SimpleTokenClassificationModel](src/pie_modules/models/simple_token_classification.py) -- [TokenClassificationModelWithSeq2SeqEncoderAndCrf](src/pie_modules/models/token_classification_with_seq2seq_encoder_and_crf.py) -- [SimpleExtractiveQuestionAnsweringModel](src/pie_modules/models/simple_extractive_question_answering.py) -- [SimpleGenerativeModel](src/pie_modules/models/simple_generative.py) -- [SpanTupleClassificationModel](src/pie_modules/models/span_tuple_classification.py) - -Available taskmodules: - -- [RETextClassificationWithIndicesTaskModule](src/pie_modules/taskmodules/re_text_classification_with_indices.py) -- [CrossTextBinaryCorefTaskModule](src/pie_modules/taskmodules/cross_text_binary_coref.py) -- [LabeledSpanExtractionByTokenClassificationTaskModule](src/pie_modules/taskmodules/labeled_span_extraction_by_token_classification.py) -- [ExtractiveQuestionAnsweringTaskModule](src/pie_modules/taskmodules/extractive_question_answering.py) -- [TextToTextTaskModule](src/pie_modules/taskmodules/text_to_text.py) -- [PointerNetworkTaskModuleForEnd2EndRE](src/pie_modules/taskmodules/pointer_network_for_end2end_re.py) -- [RESpanPairClassificationTaskModule](src/pie_modules/taskmodules/re_span_pair_classification.py) +Available document types: see [here](src/pie_modules/documents.py). Available metrics: +- [F1Metric](src/pie_modules/metrics/f1.py) +- [ConfusionMatrix](src/pie_modules/metrics/confusion_matrix.py) - [SpanLengthCollector](src/pie_modules/metrics/span_length_collector.py) - [RelationArgumentDistanceCollector](src/pie_modules/metrics/relation_argument_distance_collector.py) - [SpanCoverageCollector](src/pie_modules/metrics/span_coverage_collector.py) @@ -48,7 +31,7 @@ Document processing utilities: - [RelationArgumentSorter](src/pie_modules/document/processing/relation_argument_sorter.py) - [SentenceSplitter](src/pie_modules/document/processing/sentence_splitter.py) - [TextSpanTrimmer](src/pie_modules/document/processing/text_span_trimmer.py) -- [tokenize_document](src/pie_modules/document/processing/tokenization.py) +- [tokenization utils](src/pie_modules/document/processing/tokenization.py), e.g., `text_based_document_to_token_based` and `token_based_document_to_text_based` ## Setup From 60b25c8050b63ed28ec73a776603ea60b88a3dcd Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Wed, 30 Apr 2025 15:14:46 +0200 Subject: [PATCH 06/11] remove utils.dictionary and utils.hydra in favor of versions from pie-core --- src/pie_modules/utils/__init__.py | 3 -- src/pie_modules/utils/dictionary.py | 3 -- src/pie_modules/utils/hydra.py | 2 -- tests/utils/test_hydra.py | 43 ----------------------------- 4 files changed, 51 deletions(-) delete mode 100644 src/pie_modules/utils/dictionary.py delete mode 100644 src/pie_modules/utils/hydra.py delete mode 100644 tests/utils/test_hydra.py diff --git a/src/pie_modules/utils/__init__.py b/src/pie_modules/utils/__init__.py index c8017711c..e69de29bb 100644 --- a/src/pie_modules/utils/__init__.py +++ b/src/pie_modules/utils/__init__.py @@ -1,3 +0,0 @@ -# backwards compatibility -from .dictionary import flatten_dict, list_of_dicts2dict_of_lists -from .hydra import resolve_type diff --git a/src/pie_modules/utils/dictionary.py b/src/pie_modules/utils/dictionary.py deleted file mode 100644 index 34cb20be0..000000000 --- a/src/pie_modules/utils/dictionary.py +++ /dev/null @@ -1,3 +0,0 @@ -# backwards compatibility -from pie_core.utils.dictionary import flatten_dict_s as flatten_dict -from pie_core.utils.dictionary import list_of_dicts2dict_of_lists diff --git a/src/pie_modules/utils/hydra.py b/src/pie_modules/utils/hydra.py deleted file mode 100644 index 21e55eeb9..000000000 --- a/src/pie_modules/utils/hydra.py +++ /dev/null @@ -1,2 +0,0 @@ -# backwards compatibility -from pie_core.utils.hydra import resolve_target, resolve_type diff --git a/tests/utils/test_hydra.py b/tests/utils/test_hydra.py deleted file mode 100644 index e0f6ddf82..000000000 --- a/tests/utils/test_hydra.py +++ /dev/null @@ -1,43 +0,0 @@ -import dataclasses - -import pytest -from pie_core import AnnotationLayer, annotation_field -from pie_core.utils.hydra import resolve_type - -from pie_modules.annotations import LabeledSpan, Span -from pie_modules.documents import TextBasedDocument - - -@dataclasses.dataclass -class TestDocumentWithEntities(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - -@dataclasses.dataclass -class TestDocumentWithSentences(TextBasedDocument): - sentences: AnnotationLayer[Span] = annotation_field(target="text") - - -def test_resolve_document_type(): - assert resolve_type(TestDocumentWithEntities) == TestDocumentWithEntities - assert ( - resolve_type("tests.utils.test_hydra.TestDocumentWithEntities") == TestDocumentWithEntities - ) - with pytest.raises(TypeError) as exc_info: - resolve_type("tests.utils.test_hydra.test_resolve_document_type") - assert str(exc_info.value).startswith( - "type must be a subclass of None or a string that resolves to that, but got " - " Date: Fri, 30 May 2025 18:00:12 +0200 Subject: [PATCH 07/11] update poetry.lock --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index f07498fe5..b7618c92a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3412,4 +3412,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "5a228f0d856084e84519582d2fe026d5a808ff9c9144d9058a43ef6bf4c67714" +content-hash = "c95cdac43f10143aa8d2c695dc76c40400096001cc2c0f89f38c69fc0bdb67a0" From c4995b023de16c854d590233129e163e0e4fed0d Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Thu, 17 Jul 2025 17:17:41 +0200 Subject: [PATCH 08/11] fix merge remnant --- src/pie_modules/document/processing/sentence_splitter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pie_modules/document/processing/sentence_splitter.py b/src/pie_modules/document/processing/sentence_splitter.py index 14e1b0e7d..5230e2d19 100644 --- a/src/pie_modules/document/processing/sentence_splitter.py +++ b/src/pie_modules/document/processing/sentence_splitter.py @@ -46,7 +46,6 @@ def __init__( self.inplace = inplace # download the NLTK Punkt tokenizer model nltk.download("punkt") - nltk.download("punkt_tab") self.sentencizer = nltk.data.load(sentencizer_url) def __call__(self, document: D) -> D: From bf40a824bc943f7835c328d7f803f3d3cbe28578 Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Thu, 17 Jul 2025 17:18:08 +0200 Subject: [PATCH 09/11] use pie-core 0.2.0 --- poetry.lock | 2792 +++++++++++++++++++++++------------------------- pyproject.toml | 4 +- 2 files changed, 1312 insertions(+), 1484 deletions(-) diff --git a/poetry.lock b/poetry.lock index b7618c92a..5fbd4d80e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,190 +1,51 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "accelerate" -version = "1.6.0" +version = "0.32.1" description = "Accelerate" optional = false -python-versions = ">=3.9.0" +python-versions = ">=3.8.0" +groups = ["dev"] files = [ - {file = "accelerate-1.6.0-py3-none-any.whl", hash = "sha256:1aee717d3d3735ad6d09710a7c26990ee4652b79b4e93df46551551b5227c2aa"}, - {file = "accelerate-1.6.0.tar.gz", hash = "sha256:28c1ef1846e690944f98b68dc7b8bb6c51d032d45e85dcbb3adb0c8b99dffb32"}, + {file = "accelerate-0.32.1-py3-none-any.whl", hash = "sha256:71fcf4be00872194071de561634268b71417d7f5b16b178e2fa76b6f117c52b0"}, + {file = "accelerate-0.32.1.tar.gz", hash = "sha256:3999acff0237cd0d4f9fd98b42d5a3163544777b53fc4f1eec886b77e992d177"}, ] [package.dependencies] -huggingface-hub = ">=0.21.0" -numpy = ">=1.17,<3.0.0" +huggingface-hub = "*" +numpy = ">=1.17,<2.0.0" packaging = ">=20.0" psutil = "*" pyyaml = "*" -safetensors = ">=0.4.3" -torch = ">=2.0.0" +safetensors = ">=0.3.1" +torch = ">=1.10.0" [package.extras] -deepspeed = ["deepspeed"] -dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "diffusers", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.11.2,<0.12.0)", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] -quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.11.2,<0.12.0)"] +deepspeed = ["deepspeed (<=0.14.0)"] +dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "diffusers", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.2.1,<0.3.0)", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.2.1,<0.3.0)"] rich = ["rich"] sagemaker = ["sagemaker"] -test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] -test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist"] -test-trackers = ["comet-ml", "dvclive", "matplotlib", "mlflow", "tensorboard", "wandb"] -testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"] - -[[package]] -name = "aiohttp" -version = "3.9.5" -description = "Async http client/server framework (asyncio)" -optional = false -python-versions = ">=3.8" -files = [ - {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"}, - {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"}, - {file = "aiohttp-3.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10"}, - {file = "aiohttp-3.9.5-cp310-cp310-win32.whl", hash = "sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb"}, - {file = "aiohttp-3.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb"}, - {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342"}, - {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d"}, - {file = "aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75"}, - {file = "aiohttp-3.9.5-cp311-cp311-win32.whl", hash = "sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6"}, - {file = "aiohttp-3.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a"}, - {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678"}, - {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c"}, - {file = "aiohttp-3.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da"}, - {file = "aiohttp-3.9.5-cp312-cp312-win32.whl", hash = "sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59"}, - {file = "aiohttp-3.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888"}, - {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8"}, - {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8"}, - {file = "aiohttp-3.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe"}, - {file = "aiohttp-3.9.5-cp38-cp38-win32.whl", hash = "sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da"}, - {file = "aiohttp-3.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a"}, - {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed"}, - {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a"}, - {file = "aiohttp-3.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2"}, - {file = "aiohttp-3.9.5-cp39-cp39-win32.whl", hash = "sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09"}, - {file = "aiohttp-3.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1"}, - {file = "aiohttp-3.9.5.tar.gz", hash = "sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551"}, -] - -[package.dependencies] -aiosignal = ">=1.1.2" -async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} -attrs = ">=17.3.0" -frozenlist = ">=1.1.1" -multidict = ">=4.5,<7.0" -yarl = ">=1.0,<2.0" - -[package.extras] -speedups = ["Brotli", "aiodns", "brotlicffi"] - -[[package]] -name = "aiosignal" -version = "1.3.1" -description = "aiosignal: a list of registered asynchronous callbacks" -optional = false -python-versions = ">=3.7" -files = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, -] - -[package.dependencies] -frozenlist = ">=1.1.0" - -[[package]] -name = "async-timeout" -version = "4.0.3" -description = "Timeout context manager for asyncio programs" -optional = false -python-versions = ">=3.7" -files = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] - -[[package]] -name = "attrs" -version = "23.2.0" -description = "Classes Without Boilerplate" -optional = false -python-versions = ">=3.7" -files = [ - {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, - {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, -] - -[package.extras] -cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] -dev = ["attrs[tests]", "pre-commit"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] -tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] -tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist"] +test-trackers = ["comet-ml", "dvclive", "tensorboard", "wandb"] +testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] [[package]] name = "beautifulsoup4" -version = "4.13.4" +version = "4.12.3" description = "Screen-scraping library" optional = false -python-versions = ">=3.7.0" +python-versions = ">=3.6.0" +groups = ["dev"] files = [ - {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, - {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, ] [package.dependencies] soupsieve = ">1.2" -typing-extensions = ">=4.0.0" [package.extras] cchardet = ["cchardet"] @@ -195,32 +56,34 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.38.3" +version = "1.34.146" description = "The AWS SDK for Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "boto3-1.38.3-py3-none-any.whl", hash = "sha256:9218f86e2164e1bddb75d435bbde4fa651aa58687213d7e3e1b50f7eb8868f66"}, - {file = "boto3-1.38.3.tar.gz", hash = "sha256:655d51abcd68a40a33c52dbaa2ca73fc63c746b894e2ae22ed8ddc1912ddd93f"}, + {file = "boto3-1.34.146-py3-none-any.whl", hash = "sha256:7ec568fb19bce82a70be51f08fddac1ef927ca3fb0896cbb34303a012ba228d8"}, + {file = "boto3-1.34.146.tar.gz", hash = "sha256:5686fe2a6d1aa1de8a88e9589cdcc33361640d3d7a13da718a30717248886124"}, ] [package.dependencies] -botocore = ">=1.38.3,<1.39.0" +botocore = ">=1.34.146,<1.35.0" jmespath = ">=0.7.1,<2.0.0" -s3transfer = ">=0.12.0,<0.13.0" +s3transfer = ">=0.10.0,<0.11.0" [package.extras] crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.38.3" +version = "1.34.146" description = "Low-level, data-driven core of boto 3." optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "botocore-1.38.3-py3-none-any.whl", hash = "sha256:96f823240fe3704b99c17d1d1b2fd2d1679cf56d2a55b095f00255b76087cbf0"}, - {file = "botocore-1.38.3.tar.gz", hash = "sha256:790f8f966201781f5fcf486d48b4492e9f734446bbf9d19ef8159d08be854243"}, + {file = "botocore-1.34.146-py3-none-any.whl", hash = "sha256:3fd4782362bd29c192704ebf859c5c8c5189ad05719e391eefe23088434427ae"}, + {file = "botocore-1.34.146.tar.gz", hash = "sha256:849cb8e54e042443aeabcd7822b5f2b76cb5cfe33fe3a71f91c7c069748a869c"}, ] [package.dependencies] @@ -232,17 +95,18 @@ urllib3 = [ ] [package.extras] -crt = ["awscrt (==0.23.8)"] +crt = ["awscrt (==0.20.11)"] [[package]] name = "bpemb" -version = "0.3.6" +version = "0.3.5" description = "Byte-pair embeddings in 275 languages" optional = false python-versions = "*" +groups = ["dev"] files = [ - {file = "bpemb-0.3.6-py3-none-any.whl", hash = "sha256:6eabc133bbd0a7dbeb52b2cfed55ca5cacbb38b236ebb1f504b279a2d835e8b7"}, - {file = "bpemb-0.3.6.tar.gz", hash = "sha256:a33fa1dcdfaf3d4cb3eaebac430b6f23a684a888e1761f5a026ce3868153ee2d"}, + {file = "bpemb-0.3.5-py3-none-any.whl", hash = "sha256:c8b5ba9255d93341073a2a3ab335463660ffd44b9bb96b86156215ada22b241a"}, + {file = "bpemb-0.3.5.tar.gz", hash = "sha256:6d1254d3ccdb9617f064c9ae5c995a7d0d621cf5db7d3cdc1fe3cb2000b2601b"}, ] [package.dependencies] @@ -258,6 +122,7 @@ version = "2024.2.2" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main", "dev"] files = [ {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, @@ -269,6 +134,7 @@ version = "3.4.0" description = "Validate configuration and produce human readable error messages." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, @@ -280,6 +146,7 @@ version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" +groups = ["main", "dev"] files = [ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, @@ -375,13 +242,14 @@ files = [ [[package]] name = "click" -version = "8.1.8" +version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ - {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, - {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, ] [package.dependencies] @@ -393,20 +261,23 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} [[package]] name = "conllu" -version = "6.0.0" +version = "5.0.1" description = "CoNLL-U Parser parses a CoNLL-U formatted string into a nested python dictionary" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "conllu-6.0.0-py3-none-any.whl", hash = "sha256:c47206a0912f768bfae429d3d3c2c7f5ed068babd2502663e865cfb21532cbcc"}, - {file = "conllu-6.0.0.tar.gz", hash = "sha256:bc6072d49d00e77f4454039519118c0500fafa0d0eb509f53793081084f50aba"}, + {file = "conllu-5.0.1-py3-none-any.whl", hash = "sha256:19f46a8b61433e6ad7f06ffcafe72a18405821f8fda710f8579b95371a3cb3f9"}, + {file = "conllu-5.0.1.tar.gz", hash = "sha256:1ce417575853c58910068ab05323ccf98135255db2afa0ff154ff29628484b6b"}, ] [package.extras] @@ -414,87 +285,67 @@ test = ["tox"] [[package]] name = "contourpy" -version = "1.3.0" +version = "1.2.1" description = "Python library for calculating contours of 2D quadrilateral grids" optional = false python-versions = ">=3.9" -files = [ - {file = "contourpy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:880ea32e5c774634f9fcd46504bf9f080a41ad855f4fef54f5380f5133d343c7"}, - {file = "contourpy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:76c905ef940a4474a6289c71d53122a4f77766eef23c03cd57016ce19d0f7b42"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f8557cbb07415a4d6fa191f20fd9d2d9eb9c0b61d1b2f52a8926e43c6e9af7"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36f965570cff02b874773c49bfe85562b47030805d7d8360748f3eca570f4cab"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cacd81e2d4b6f89c9f8a5b69b86490152ff39afc58a95af002a398273e5ce589"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69375194457ad0fad3a839b9e29aa0b0ed53bb54db1bfb6c3ae43d111c31ce41"}, - {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a52040312b1a858b5e31ef28c2e865376a386c60c0e248370bbea2d3f3b760d"}, - {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3faeb2998e4fcb256542e8a926d08da08977f7f5e62cf733f3c211c2a5586223"}, - {file = "contourpy-1.3.0-cp310-cp310-win32.whl", hash = "sha256:36e0cff201bcb17a0a8ecc7f454fe078437fa6bda730e695a92f2d9932bd507f"}, - {file = "contourpy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:87ddffef1dbe5e669b5c2440b643d3fdd8622a348fe1983fad7a0f0ccb1cd67b"}, - {file = "contourpy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fa4c02abe6c446ba70d96ece336e621efa4aecae43eaa9b030ae5fb92b309ad"}, - {file = "contourpy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:834e0cfe17ba12f79963861e0f908556b2cedd52e1f75e6578801febcc6a9f49"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbc4c3217eee163fa3984fd1567632b48d6dfd29216da3ded3d7b844a8014a66"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4865cd1d419e0c7a7bf6de1777b185eebdc51470800a9f42b9e9decf17762081"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:303c252947ab4b14c08afeb52375b26781ccd6a5ccd81abcdfc1fafd14cf93c1"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637f674226be46f6ba372fd29d9523dd977a291f66ab2a74fbeb5530bb3f445d"}, - {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76a896b2f195b57db25d6b44e7e03f221d32fe318d03ede41f8b4d9ba1bff53c"}, - {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e1fd23e9d01591bab45546c089ae89d926917a66dceb3abcf01f6105d927e2cb"}, - {file = "contourpy-1.3.0-cp311-cp311-win32.whl", hash = "sha256:d402880b84df3bec6eab53cd0cf802cae6a2ef9537e70cf75e91618a3801c20c"}, - {file = "contourpy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:6cb6cc968059db9c62cb35fbf70248f40994dfcd7aa10444bbf8b3faeb7c2d67"}, - {file = "contourpy-1.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:570ef7cf892f0afbe5b2ee410c507ce12e15a5fa91017a0009f79f7d93a1268f"}, - {file = "contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da84c537cb8b97d153e9fb208c221c45605f73147bd4cadd23bdae915042aad6"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0be4d8425bfa755e0fd76ee1e019636ccc7c29f77a7c86b4328a9eb6a26d0639"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c0da700bf58f6e0b65312d0a5e695179a71d0163957fa381bb3c1f72972537c"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb8b141bb00fa977d9122636b16aa67d37fd40a3d8b52dd837e536d64b9a4d06"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3634b5385c6716c258d0419c46d05c8aa7dc8cb70326c9a4fb66b69ad2b52e09"}, - {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dce35502151b6bd35027ac39ba6e5a44be13a68f55735c3612c568cac3805fd"}, - {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea348f053c645100612b333adc5983d87be69acdc6d77d3169c090d3b01dc35"}, - {file = "contourpy-1.3.0-cp312-cp312-win32.whl", hash = "sha256:90f73a5116ad1ba7174341ef3ea5c3150ddf20b024b98fb0c3b29034752c8aeb"}, - {file = "contourpy-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:b11b39aea6be6764f84360fce6c82211a9db32a7c7de8fa6dd5397cf1d079c3b"}, - {file = "contourpy-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3e1c7fa44aaae40a2247e2e8e0627f4bea3dd257014764aa644f319a5f8600e3"}, - {file = "contourpy-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:364174c2a76057feef647c802652f00953b575723062560498dc7930fc9b1cb7"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32b238b3b3b649e09ce9aaf51f0c261d38644bdfa35cbaf7b263457850957a84"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d51fca85f9f7ad0b65b4b9fe800406d0d77017d7270d31ec3fb1cc07358fdea0"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:732896af21716b29ab3e988d4ce14bc5133733b85956316fb0c56355f398099b"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d73f659398a0904e125280836ae6f88ba9b178b2fed6884f3b1f95b989d2c8da"}, - {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6c7c2408b7048082932cf4e641fa3b8ca848259212f51c8c59c45aa7ac18f14"}, - {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f317576606de89da6b7e0861cf6061f6146ead3528acabff9236458a6ba467f8"}, - {file = "contourpy-1.3.0-cp313-cp313-win32.whl", hash = "sha256:31cd3a85dbdf1fc002280c65caa7e2b5f65e4a973fcdf70dd2fdcb9868069294"}, - {file = "contourpy-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4553c421929ec95fb07b3aaca0fae668b2eb5a5203d1217ca7c34c063c53d087"}, - {file = "contourpy-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:345af746d7766821d05d72cb8f3845dfd08dd137101a2cb9b24de277d716def8"}, - {file = "contourpy-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3bb3808858a9dc68f6f03d319acd5f1b8a337e6cdda197f02f4b8ff67ad2057b"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:420d39daa61aab1221567b42eecb01112908b2cab7f1b4106a52caaec8d36973"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d63ee447261e963af02642ffcb864e5a2ee4cbfd78080657a9880b8b1868e18"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:167d6c890815e1dac9536dca00828b445d5d0df4d6a8c6adb4a7ec3166812fa8"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:710a26b3dc80c0e4febf04555de66f5fd17e9cf7170a7b08000601a10570bda6"}, - {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:75ee7cb1a14c617f34a51d11fa7524173e56551646828353c4af859c56b766e2"}, - {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927"}, - {file = "contourpy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a11077e395f67ffc2c44ec2418cfebed032cd6da3022a94fc227b6faf8e2acb8"}, - {file = "contourpy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8134301d7e204c88ed7ab50028ba06c683000040ede1d617298611f9dc6240c"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e12968fdfd5bb45ffdf6192a590bd8ddd3ba9e58360b29683c6bb71a7b41edca"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd2a0fc506eccaaa7595b7e1418951f213cf8255be2600f1ea1b61e46a60c55f"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfb5c62ce023dfc410d6059c936dcf96442ba40814aefbfa575425a3a7f19dc"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68a32389b06b82c2fdd68276148d7b9275b5f5cf13e5417e4252f6d1a34f72a2"}, - {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94e848a6b83da10898cbf1311a815f770acc9b6a3f2d646f330d57eb4e87592e"}, - {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d78ab28a03c854a873787a0a42254a0ccb3cb133c672f645c9f9c8f3ae9d0800"}, - {file = "contourpy-1.3.0-cp39-cp39-win32.whl", hash = "sha256:81cb5ed4952aae6014bc9d0421dec7c5835c9c8c31cdf51910b708f548cf58e5"}, - {file = "contourpy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:14e262f67bd7e6eb6880bc564dcda30b15e351a594657e55b7eec94b6ef72843"}, - {file = "contourpy-1.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe41b41505a5a33aeaed2a613dccaeaa74e0e3ead6dd6fd3a118fb471644fd6c"}, - {file = "contourpy-1.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eca7e17a65f72a5133bdbec9ecf22401c62bcf4821361ef7811faee695799779"}, - {file = "contourpy-1.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ec4dc6bf570f5b22ed0d7efba0dfa9c5b9e0431aeea7581aa217542d9e809a4"}, - {file = "contourpy-1.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:00ccd0dbaad6d804ab259820fa7cb0b8036bda0686ef844d24125d8287178ce0"}, - {file = "contourpy-1.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca947601224119117f7c19c9cdf6b3ab54c5726ef1d906aa4a69dfb6dd58102"}, - {file = "contourpy-1.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6ec93afeb848a0845a18989da3beca3eec2c0f852322efe21af1931147d12cb"}, - {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"}, +groups = ["dev"] +files = [ + {file = "contourpy-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bd7c23df857d488f418439686d3b10ae2fbf9bc256cd045b37a8c16575ea1040"}, + {file = "contourpy-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5b9eb0ca724a241683c9685a484da9d35c872fd42756574a7cfbf58af26677fd"}, + {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c75507d0a55378240f781599c30e7776674dbaf883a46d1c90f37e563453480"}, + {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11959f0ce4a6f7b76ec578576a0b61a28bdc0696194b6347ba3f1c53827178b9"}, + {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb3315a8a236ee19b6df481fc5f997436e8ade24a9f03dfdc6bd490fea20c6da"}, + {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39f3ecaf76cd98e802f094e0d4fbc6dc9c45a8d0c4d185f0f6c2234e14e5f75b"}, + {file = "contourpy-1.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:94b34f32646ca0414237168d68a9157cb3889f06b096612afdd296003fdd32fd"}, + {file = "contourpy-1.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:457499c79fa84593f22454bbd27670227874cd2ff5d6c84e60575c8b50a69619"}, + {file = "contourpy-1.2.1-cp310-cp310-win32.whl", hash = "sha256:ac58bdee53cbeba2ecad824fa8159493f0bf3b8ea4e93feb06c9a465d6c87da8"}, + {file = "contourpy-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:9cffe0f850e89d7c0012a1fb8730f75edd4320a0a731ed0c183904fe6ecfc3a9"}, + {file = "contourpy-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6022cecf8f44e36af10bd9118ca71f371078b4c168b6e0fab43d4a889985dbb5"}, + {file = "contourpy-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef5adb9a3b1d0c645ff694f9bca7702ec2c70f4d734f9922ea34de02294fdf72"}, + {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6150ffa5c767bc6332df27157d95442c379b7dce3a38dff89c0f39b63275696f"}, + {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c863140fafc615c14a4bf4efd0f4425c02230eb8ef02784c9a156461e62c965"}, + {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:00e5388f71c1a0610e6fe56b5c44ab7ba14165cdd6d695429c5cd94021e390b2"}, + {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4492d82b3bc7fbb7e3610747b159869468079fe149ec5c4d771fa1f614a14df"}, + {file = "contourpy-1.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:49e70d111fee47284d9dd867c9bb9a7058a3c617274900780c43e38d90fe1205"}, + {file = "contourpy-1.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b59c0ffceff8d4d3996a45f2bb6f4c207f94684a96bf3d9728dbb77428dd8cb8"}, + {file = "contourpy-1.2.1-cp311-cp311-win32.whl", hash = "sha256:7b4182299f251060996af5249c286bae9361fa8c6a9cda5efc29fe8bfd6062ec"}, + {file = "contourpy-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2855c8b0b55958265e8b5888d6a615ba02883b225f2227461aa9127c578a4922"}, + {file = "contourpy-1.2.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:62828cada4a2b850dbef89c81f5a33741898b305db244904de418cc957ff05dc"}, + {file = "contourpy-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:309be79c0a354afff9ff7da4aaed7c3257e77edf6c1b448a779329431ee79d7e"}, + {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e785e0f2ef0d567099b9ff92cbfb958d71c2d5b9259981cd9bee81bd194c9a4"}, + {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1cac0a8f71a041aa587410424ad46dfa6a11f6149ceb219ce7dd48f6b02b87a7"}, + {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af3f4485884750dddd9c25cb7e3915d83c2db92488b38ccb77dd594eac84c4a0"}, + {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ce6889abac9a42afd07a562c2d6d4b2b7134f83f18571d859b25624a331c90b"}, + {file = "contourpy-1.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a1eea9aecf761c661d096d39ed9026574de8adb2ae1c5bd7b33558af884fb2ce"}, + {file = "contourpy-1.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:187fa1d4c6acc06adb0fae5544c59898ad781409e61a926ac7e84b8f276dcef4"}, + {file = "contourpy-1.2.1-cp312-cp312-win32.whl", hash = "sha256:c2528d60e398c7c4c799d56f907664673a807635b857df18f7ae64d3e6ce2d9f"}, + {file = "contourpy-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:1a07fc092a4088ee952ddae19a2b2a85757b923217b7eed584fdf25f53a6e7ce"}, + {file = "contourpy-1.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bb6834cbd983b19f06908b45bfc2dad6ac9479ae04abe923a275b5f48f1a186b"}, + {file = "contourpy-1.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1d59e739ab0e3520e62a26c60707cc3ab0365d2f8fecea74bfe4de72dc56388f"}, + {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd3db01f59fdcbce5b22afad19e390260d6d0222f35a1023d9adc5690a889364"}, + {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a12a813949e5066148712a0626895c26b2578874e4cc63160bb007e6df3436fe"}, + {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe0ccca550bb8e5abc22f530ec0466136379c01321fd94f30a22231e8a48d985"}, + {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1d59258c3c67c865435d8fbeb35f8c59b8bef3d6f46c1f29f6123556af28445"}, + {file = "contourpy-1.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f32c38afb74bd98ce26de7cc74a67b40afb7b05aae7b42924ea990d51e4dac02"}, + {file = "contourpy-1.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d31a63bc6e6d87f77d71e1abbd7387ab817a66733734883d1fc0021ed9bfa083"}, + {file = "contourpy-1.2.1-cp39-cp39-win32.whl", hash = "sha256:ddcb8581510311e13421b1f544403c16e901c4e8f09083c881fab2be80ee31ba"}, + {file = "contourpy-1.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:10a37ae557aabf2509c79715cd20b62e4c7c28b8cd62dd7d99e5ed3ce28c3fd9"}, + {file = "contourpy-1.2.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a31f94983fecbac95e58388210427d68cd30fe8a36927980fab9c20062645609"}, + {file = "contourpy-1.2.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef2b055471c0eb466033760a521efb9d8a32b99ab907fc8358481a1dd29e3bd3"}, + {file = "contourpy-1.2.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b33d2bc4f69caedcd0a275329eb2198f560b325605810895627be5d4b876bf7f"}, + {file = "contourpy-1.2.1.tar.gz", hash = "sha256:4d8908b3bee1c889e547867ca4cdc54e5ab6be6d3e078556814a22457f49423c"}, ] [package.dependencies] -numpy = ">=1.23" +numpy = ">=1.20" [package.extras] bokeh = ["bokeh", "selenium"] docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] -mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.8.0)", "types-Pillow"] test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] -test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] +test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" @@ -502,6 +353,7 @@ version = "7.5.3" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, @@ -561,7 +413,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli"] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "cycler" @@ -569,6 +421,7 @@ version = "0.12.1" description = "Composable style cycles" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, @@ -580,20 +433,21 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "deprecated" -version = "1.2.18" +version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["dev"] files = [ - {file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"}, - {file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"}, + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, ] [package.dependencies] wrapt = ">=1.10,<2" [package.extras] -dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "distlib" @@ -601,6 +455,7 @@ version = "0.3.8" description = "Distribution utilities" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, @@ -612,6 +467,8 @@ version = "1.2.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["dev"] +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, @@ -626,6 +483,7 @@ version = "3.14.0" description = "A platform independent file lock." optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"}, {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"}, @@ -634,7 +492,7 @@ files = [ [package.extras] docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] -typing = ["typing-extensions (>=4.8)"] +typing = ["typing-extensions (>=4.8) ; python_version < \"3.11\""] [[package]] name = "flair" @@ -642,6 +500,7 @@ version = "0.13.1" description = "A very simple framework for state-of-the-art NLP" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "flair-0.13.1-py3-none-any.whl", hash = "sha256:d38a01bbaf6c708059a3a23f10487e1d2dabb3171fc262fac35d95215821c2cc"}, {file = "flair-0.13.1.tar.gz", hash = "sha256:34f6e99180a2d4b9f24783e6225be0fe8d7ab2d1c6c59dbc1cec4b4eb03ba12d"}, @@ -680,162 +539,69 @@ wikipedia-api = ">=0.5.7" [[package]] name = "fonttools" -version = "4.57.0" +version = "4.53.1" description = "Tools to manipulate font files" optional = false python-versions = ">=3.8" -files = [ - {file = "fonttools-4.57.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:babe8d1eb059a53e560e7bf29f8e8f4accc8b6cfb9b5fd10e485bde77e71ef41"}, - {file = "fonttools-4.57.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:81aa97669cd726349eb7bd43ca540cf418b279ee3caba5e2e295fb4e8f841c02"}, - {file = "fonttools-4.57.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0e9618630edd1910ad4f07f60d77c184b2f572c8ee43305ea3265675cbbfe7e"}, - {file = "fonttools-4.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34687a5d21f1d688d7d8d416cb4c5b9c87fca8a1797ec0d74b9fdebfa55c09ab"}, - {file = "fonttools-4.57.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69ab81b66ebaa8d430ba56c7a5f9abe0183afefd3a2d6e483060343398b13fb1"}, - {file = "fonttools-4.57.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d639397de852f2ccfb3134b152c741406752640a266d9c1365b0f23d7b88077f"}, - {file = "fonttools-4.57.0-cp310-cp310-win32.whl", hash = "sha256:cc066cb98b912f525ae901a24cd381a656f024f76203bc85f78fcc9e66ae5aec"}, - {file = "fonttools-4.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7a64edd3ff6a7f711a15bd70b4458611fb240176ec11ad8845ccbab4fe6745db"}, - {file = "fonttools-4.57.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3871349303bdec958360eedb619169a779956503ffb4543bb3e6211e09b647c4"}, - {file = "fonttools-4.57.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c59375e85126b15a90fcba3443eaac58f3073ba091f02410eaa286da9ad80ed8"}, - {file = "fonttools-4.57.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967b65232e104f4b0f6370a62eb33089e00024f2ce143aecbf9755649421c683"}, - {file = "fonttools-4.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39acf68abdfc74e19de7485f8f7396fa4d2418efea239b7061d6ed6a2510c746"}, - {file = "fonttools-4.57.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d077f909f2343daf4495ba22bb0e23b62886e8ec7c109ee8234bdbd678cf344"}, - {file = "fonttools-4.57.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:46370ac47a1e91895d40e9ad48effbe8e9d9db1a4b80888095bc00e7beaa042f"}, - {file = "fonttools-4.57.0-cp311-cp311-win32.whl", hash = "sha256:ca2aed95855506b7ae94e8f1f6217b7673c929e4f4f1217bcaa236253055cb36"}, - {file = "fonttools-4.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:17168a4670bbe3775f3f3f72d23ee786bd965395381dfbb70111e25e81505b9d"}, - {file = "fonttools-4.57.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:889e45e976c74abc7256d3064aa7c1295aa283c6bb19810b9f8b604dfe5c7f31"}, - {file = "fonttools-4.57.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0425c2e052a5f1516c94e5855dbda706ae5a768631e9fcc34e57d074d1b65b92"}, - {file = "fonttools-4.57.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44c26a311be2ac130f40a96769264809d3b0cb297518669db437d1cc82974888"}, - {file = "fonttools-4.57.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84c41ba992df5b8d680b89fd84c6a1f2aca2b9f1ae8a67400c8930cd4ea115f6"}, - {file = "fonttools-4.57.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ea1e9e43ca56b0c12440a7c689b1350066595bebcaa83baad05b8b2675129d98"}, - {file = "fonttools-4.57.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:84fd56c78d431606332a0627c16e2a63d243d0d8b05521257d77c6529abe14d8"}, - {file = "fonttools-4.57.0-cp312-cp312-win32.whl", hash = "sha256:f4376819c1c778d59e0a31db5dc6ede854e9edf28bbfa5b756604727f7f800ac"}, - {file = "fonttools-4.57.0-cp312-cp312-win_amd64.whl", hash = "sha256:57e30241524879ea10cdf79c737037221f77cc126a8cdc8ff2c94d4a522504b9"}, - {file = "fonttools-4.57.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:408ce299696012d503b714778d89aa476f032414ae57e57b42e4b92363e0b8ef"}, - {file = "fonttools-4.57.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bbceffc80aa02d9e8b99f2a7491ed8c4a783b2fc4020119dc405ca14fb5c758c"}, - {file = "fonttools-4.57.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f022601f3ee9e1f6658ed6d184ce27fa5216cee5b82d279e0f0bde5deebece72"}, - {file = "fonttools-4.57.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dea5893b58d4637ffa925536462ba626f8a1b9ffbe2f5c272cdf2c6ebadb817"}, - {file = "fonttools-4.57.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dff02c5c8423a657c550b48231d0a48d7e2b2e131088e55983cfe74ccc2c7cc9"}, - {file = "fonttools-4.57.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:767604f244dc17c68d3e2dbf98e038d11a18abc078f2d0f84b6c24571d9c0b13"}, - {file = "fonttools-4.57.0-cp313-cp313-win32.whl", hash = "sha256:8e2e12d0d862f43d51e5afb8b9751c77e6bec7d2dc00aad80641364e9df5b199"}, - {file = "fonttools-4.57.0-cp313-cp313-win_amd64.whl", hash = "sha256:f1d6bc9c23356908db712d282acb3eebd4ae5ec6d8b696aa40342b1d84f8e9e3"}, - {file = "fonttools-4.57.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9d57b4e23ebbe985125d3f0cabbf286efa191ab60bbadb9326091050d88e8213"}, - {file = "fonttools-4.57.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:579ba873d7f2a96f78b2e11028f7472146ae181cae0e4d814a37a09e93d5c5cc"}, - {file = "fonttools-4.57.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e3e1ec10c29bae0ea826b61f265ec5c858c5ba2ce2e69a71a62f285cf8e4595"}, - {file = "fonttools-4.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1968f2a2003c97c4ce6308dc2498d5fd4364ad309900930aa5a503c9851aec8"}, - {file = "fonttools-4.57.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:aff40f8ac6763d05c2c8f6d240c6dac4bb92640a86d9b0c3f3fff4404f34095c"}, - {file = "fonttools-4.57.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d07f1b64008e39fceae7aa99e38df8385d7d24a474a8c9872645c4397b674481"}, - {file = "fonttools-4.57.0-cp38-cp38-win32.whl", hash = "sha256:51d8482e96b28fb28aa8e50b5706f3cee06de85cbe2dce80dbd1917ae22ec5a6"}, - {file = "fonttools-4.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:03290e818782e7edb159474144fca11e36a8ed6663d1fcbd5268eb550594fd8e"}, - {file = "fonttools-4.57.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7339e6a3283e4b0ade99cade51e97cde3d54cd6d1c3744459e886b66d630c8b3"}, - {file = "fonttools-4.57.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:05efceb2cb5f6ec92a4180fcb7a64aa8d3385fd49cfbbe459350229d1974f0b1"}, - {file = "fonttools-4.57.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a97bb05eb24637714a04dee85bdf0ad1941df64fe3b802ee4ac1c284a5f97b7c"}, - {file = "fonttools-4.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:541cb48191a19ceb1a2a4b90c1fcebd22a1ff7491010d3cf840dd3a68aebd654"}, - {file = "fonttools-4.57.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:cdef9a056c222d0479a1fdb721430f9efd68268014c54e8166133d2643cb05d9"}, - {file = "fonttools-4.57.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3cf97236b192a50a4bf200dc5ba405aa78d4f537a2c6e4c624bb60466d5b03bd"}, - {file = "fonttools-4.57.0-cp39-cp39-win32.whl", hash = "sha256:e952c684274a7714b3160f57ec1d78309f955c6335c04433f07d36c5eb27b1f9"}, - {file = "fonttools-4.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2a722c0e4bfd9966a11ff55c895c817158fcce1b2b6700205a376403b546ad9"}, - {file = "fonttools-4.57.0-py3-none-any.whl", hash = "sha256:3122c604a675513c68bd24c6a8f9091f1c2376d18e8f5fe5a101746c81b3e98f"}, - {file = "fonttools-4.57.0.tar.gz", hash = "sha256:727ece10e065be2f9dd239d15dd5d60a66e17eac11aea47d447f9f03fdbc42de"}, +groups = ["dev"] +files = [ + {file = "fonttools-4.53.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0679a30b59d74b6242909945429dbddb08496935b82f91ea9bf6ad240ec23397"}, + {file = "fonttools-4.53.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8bf06b94694251861ba7fdeea15c8ec0967f84c3d4143ae9daf42bbc7717fe3"}, + {file = "fonttools-4.53.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b96cd370a61f4d083c9c0053bf634279b094308d52fdc2dd9a22d8372fdd590d"}, + {file = "fonttools-4.53.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1c7c5aa18dd3b17995898b4a9b5929d69ef6ae2af5b96d585ff4005033d82f0"}, + {file = "fonttools-4.53.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e013aae589c1c12505da64a7d8d023e584987e51e62006e1bb30d72f26522c41"}, + {file = "fonttools-4.53.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9efd176f874cb6402e607e4cc9b4a9cd584d82fc34a4b0c811970b32ba62501f"}, + {file = "fonttools-4.53.1-cp310-cp310-win32.whl", hash = "sha256:c8696544c964500aa9439efb6761947393b70b17ef4e82d73277413f291260a4"}, + {file = "fonttools-4.53.1-cp310-cp310-win_amd64.whl", hash = "sha256:8959a59de5af6d2bec27489e98ef25a397cfa1774b375d5787509c06659b3671"}, + {file = "fonttools-4.53.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:da33440b1413bad53a8674393c5d29ce64d8c1a15ef8a77c642ffd900d07bfe1"}, + {file = "fonttools-4.53.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ff7e5e9bad94e3a70c5cd2fa27f20b9bb9385e10cddab567b85ce5d306ea923"}, + {file = "fonttools-4.53.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6e7170d675d12eac12ad1a981d90f118c06cf680b42a2d74c6c931e54b50719"}, + {file = "fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee32ea8765e859670c4447b0817514ca79054463b6b79784b08a8df3a4d78e3"}, + {file = "fonttools-4.53.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6e08f572625a1ee682115223eabebc4c6a2035a6917eac6f60350aba297ccadb"}, + {file = "fonttools-4.53.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b21952c092ffd827504de7e66b62aba26fdb5f9d1e435c52477e6486e9d128b2"}, + {file = "fonttools-4.53.1-cp311-cp311-win32.whl", hash = "sha256:9dfdae43b7996af46ff9da520998a32b105c7f098aeea06b2226b30e74fbba88"}, + {file = "fonttools-4.53.1-cp311-cp311-win_amd64.whl", hash = "sha256:d4d0096cb1ac7a77b3b41cd78c9b6bc4a400550e21dc7a92f2b5ab53ed74eb02"}, + {file = "fonttools-4.53.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d92d3c2a1b39631a6131c2fa25b5406855f97969b068e7e08413325bc0afba58"}, + {file = "fonttools-4.53.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3b3c8ebafbee8d9002bd8f1195d09ed2bd9ff134ddec37ee8f6a6375e6a4f0e8"}, + {file = "fonttools-4.53.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f029c095ad66c425b0ee85553d0dc326d45d7059dbc227330fc29b43e8ba60"}, + {file = "fonttools-4.53.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f5e6c3510b79ea27bb1ebfcc67048cde9ec67afa87c7dd7efa5c700491ac7f"}, + {file = "fonttools-4.53.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f677ce218976496a587ab17140da141557beb91d2a5c1a14212c994093f2eae2"}, + {file = "fonttools-4.53.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9e6ceba2a01b448e36754983d376064730690401da1dd104ddb543519470a15f"}, + {file = "fonttools-4.53.1-cp312-cp312-win32.whl", hash = "sha256:791b31ebbc05197d7aa096bbc7bd76d591f05905d2fd908bf103af4488e60670"}, + {file = "fonttools-4.53.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ed170b5e17da0264b9f6fae86073be3db15fa1bd74061c8331022bca6d09bab"}, + {file = "fonttools-4.53.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c818c058404eb2bba05e728d38049438afd649e3c409796723dfc17cd3f08749"}, + {file = "fonttools-4.53.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:651390c3b26b0c7d1f4407cad281ee7a5a85a31a110cbac5269de72a51551ba2"}, + {file = "fonttools-4.53.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e54f1bba2f655924c1138bbc7fa91abd61f45c68bd65ab5ed985942712864bbb"}, + {file = "fonttools-4.53.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9cd19cf4fe0595ebdd1d4915882b9440c3a6d30b008f3cc7587c1da7b95be5f"}, + {file = "fonttools-4.53.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2af40ae9cdcb204fc1d8f26b190aa16534fcd4f0df756268df674a270eab575d"}, + {file = "fonttools-4.53.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:35250099b0cfb32d799fb5d6c651220a642fe2e3c7d2560490e6f1d3f9ae9169"}, + {file = "fonttools-4.53.1-cp38-cp38-win32.whl", hash = "sha256:f08df60fbd8d289152079a65da4e66a447efc1d5d5a4d3f299cdd39e3b2e4a7d"}, + {file = "fonttools-4.53.1-cp38-cp38-win_amd64.whl", hash = "sha256:7b6b35e52ddc8fb0db562133894e6ef5b4e54e1283dff606fda3eed938c36fc8"}, + {file = "fonttools-4.53.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:75a157d8d26c06e64ace9df037ee93a4938a4606a38cb7ffaf6635e60e253b7a"}, + {file = "fonttools-4.53.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4824c198f714ab5559c5be10fd1adf876712aa7989882a4ec887bf1ef3e00e31"}, + {file = "fonttools-4.53.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:becc5d7cb89c7b7afa8321b6bb3dbee0eec2b57855c90b3e9bf5fb816671fa7c"}, + {file = "fonttools-4.53.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84ec3fb43befb54be490147b4a922b5314e16372a643004f182babee9f9c3407"}, + {file = "fonttools-4.53.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:73379d3ffdeecb376640cd8ed03e9d2d0e568c9d1a4e9b16504a834ebadc2dfb"}, + {file = "fonttools-4.53.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:02569e9a810f9d11f4ae82c391ebc6fb5730d95a0657d24d754ed7763fb2d122"}, + {file = "fonttools-4.53.1-cp39-cp39-win32.whl", hash = "sha256:aae7bd54187e8bf7fd69f8ab87b2885253d3575163ad4d669a262fe97f0136cb"}, + {file = "fonttools-4.53.1-cp39-cp39-win_amd64.whl", hash = "sha256:e5b708073ea3d684235648786f5f6153a48dc8762cdfe5563c57e80787c29fbb"}, + {file = "fonttools-4.53.1-py3-none-any.whl", hash = "sha256:f1f8758a2ad110bd6432203a344269f445a2907dc24ef6bccfd0ac4e14e0d71d"}, + {file = "fonttools-4.53.1.tar.gz", hash = "sha256:e128778a8e9bc11159ce5447f76766cefbd876f44bd79aff030287254e4752c4"}, ] [package.extras] -all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] graphite = ["lz4 (>=1.7.4.2)"] -interpolatable = ["munkres", "pycairo", "scipy"] +interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] lxml = ["lxml (>=4.0)"] pathops = ["skia-pathops (>=0.5.0)"] plot = ["matplotlib"] repacker = ["uharfbuzz (>=0.23.0)"] symfont = ["sympy"] -type1 = ["xattr"] +type1 = ["xattr ; sys_platform == \"darwin\""] ufo = ["fs (>=2.2.0,<3)"] -unicode = ["unicodedata2 (>=15.1.0)"] -woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] - -[[package]] -name = "frozenlist" -version = "1.4.1" -description = "A list-like structure which implements collections.abc.MutableSequence" -optional = false -python-versions = ">=3.8" -files = [ - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"}, - {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"}, - {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"}, - {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"}, - {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"}, - {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"}, - {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"}, - {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"}, - {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"}, - {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"}, - {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"}, - {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"}, - {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, -] +unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""] +woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] [[package]] name = "fsspec" @@ -843,15 +609,12 @@ version = "2023.6.0" description = "File-system specification" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, ] -[package.dependencies] -aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""} -requests = {version = "*", optional = true, markers = "extra == \"http\""} - [package.extras] abfs = ["adlfs"] adl = ["adlfs"] @@ -878,17 +641,18 @@ tqdm = ["tqdm"] [[package]] name = "ftfy" -version = "6.3.1" +version = "6.2.0" description = "Fixes mojibake and other problems with Unicode, after the fact" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8,<4" +groups = ["dev"] files = [ - {file = "ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083"}, - {file = "ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec"}, + {file = "ftfy-6.2.0-py3-none-any.whl", hash = "sha256:f94a2c34b76e07475720e3096f5ca80911d152406fbde66fdb45c4d0c9150026"}, + {file = "ftfy-6.2.0.tar.gz", hash = "sha256:5e42143c7025ef97944ca2619d6b61b0619fc6654f98771d39e862c1424c75c0"}, ] [package.dependencies] -wcwidth = "*" +wcwidth = ">=0.2.12,<0.3.0" [[package]] name = "gdown" @@ -896,6 +660,7 @@ version = "5.2.0" description = "Google Drive Public File/Folder Downloader" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "gdown-5.2.0-py3-none-any.whl", hash = "sha256:33083832d82b1101bdd0e9df3edd0fbc0e1c5f14c9d8c38d2a35bf1683b526d6"}, {file = "gdown-5.2.0.tar.gz", hash = "sha256:2145165062d85520a3cd98b356c9ed522c5e7984d408535409fd46f94defc787"}, @@ -916,6 +681,7 @@ version = "4.3.3" description = "Python framework for fast Vector Space Modelling" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "gensim-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4e72840adfbea35c5804fd559bc0cb6bc9f439926220a37d852b7ce76eb325c1"}, {file = "gensim-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4019263c9d9afae7c669f880c17e09461e77a71afce04ed4d79cf71a4cad2848"}, @@ -962,6 +728,7 @@ version = "0.25.2" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" +groups = ["main", "dev"] files = [ {file = "huggingface_hub-0.25.2-py3-none-any.whl", hash = "sha256:1897caf88ce7f97fe0110603d8f66ac264e3ba6accdf30cd66cc0fed5282ad25"}, {file = "huggingface_hub-0.25.2.tar.gz", hash = "sha256:a1014ea111a5f40ccd23f7f7ba8ac46e20fa3b658ced1f86a00c75c06ec6423c"}, @@ -996,6 +763,7 @@ version = "2.5.36" description = "File identification library for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"}, {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, @@ -1010,6 +778,7 @@ version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" +groups = ["main", "dev"] files = [ {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, @@ -1017,25 +786,23 @@ files = [ [[package]] name = "importlib-resources" -version = "6.5.2" +version = "6.4.0" description = "Read resources from Python packages" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version == \"3.9\"" files = [ - {file = "importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec"}, - {file = "importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c"}, + {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, + {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, ] [package.dependencies] zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] -cover = ["pytest-cov"] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] -test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"] -type = ["pytest-mypy"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy ; platform_python_implementation != \"PyPy\"", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] [[package]] name = "iniconfig" @@ -1043,31 +810,19 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "intel-openmp" -version = "2021.4.0" -description = "Intel OpenMP* Runtime Library" -optional = false -python-versions = "*" -files = [ - {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:3b921236a38384e2016f0f3d65af6732cf2c12918087128a9163225451e776f2"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:e2240ab8d01472fed04f3544a878cda5da16c26232b7ea1b59132dbfb48b186e"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:6e863d8fd3d7e8ef389d52cf97a50fe2afe1a19247e8c0d168ce021546f96fc9"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:eef4c8bcc8acefd7f5cd3b9384dbf73d59e2c99fc56545712ded913f43c4a94f"}, -] - [[package]] name = "janome" version = "0.5.0" description = "Japanese morphological analysis engine." optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "Janome-0.5.0-py2.py3-none-any.whl", hash = "sha256:d098670394a77881ce2f6b7d696c0ea5ff74c0c8cf74a8a882159ec82c0e6dc7"}, {file = "Janome-0.5.0.tar.gz", hash = "sha256:ce4a3ed7a4635c2f80139639327d5b1e0381858ad74a3c4a61e8cc83f820400e"}, @@ -1079,6 +834,7 @@ version = "3.1.4" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, @@ -1096,6 +852,7 @@ version = "1.0.1" description = "JSON Matching Expressions" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -1107,6 +864,7 @@ version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, @@ -1114,125 +872,116 @@ files = [ [[package]] name = "kiwisolver" -version = "1.4.7" +version = "1.4.5" description = "A fast implementation of the Cassowary constraint solver" optional = false -python-versions = ">=3.8" -files = [ - {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"}, - {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"}, - {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"}, - {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"}, - {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"}, - {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"}, - {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"}, - {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"}, - {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"}, - {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"}, - {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"}, - {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"}, - {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"}, - {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"}, - {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"}, - {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"}, - {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"}, - {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"}, - {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"}, - {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"}, - {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"}, - {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"}, - {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"}, - {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"}, - {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"}, - {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"}, - {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"}, - {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"}, - {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"}, - {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"}, - {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"}, - {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"}, - {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"}, - {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"}, - {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"}, - {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"}, - {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"}, - {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"}, - {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"}, - {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"}, - {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"}, - {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"}, - {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"}, - {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"}, - {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"}, - {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"}, - {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"}, - {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"}, - {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"}, - {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"}, - {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"}, - {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"}, - {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"}, - {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"}, - {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"}, - {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"}, - {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"}, - {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"}, - {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"}, - {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"}, - {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"}, - {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"}, - {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"}, - {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"}, - {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"}, - {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"}, - {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"}, - {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"}, - {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"}, - {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"}, - {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"}, - {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"}, - {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"}, - {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"}, - {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"}, - {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"}, - {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"}, - {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"}, - {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"}, - {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"}, - {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"}, - {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"}, - {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"}, - {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"}, - {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"}, - {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"}, - {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"}, - {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"}, - {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"}, - {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"}, - {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"}, - {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"}, - {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"}, - {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"}, - {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"}, - {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"}, - {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"}, - {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"}, - {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"}, - {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"}, - {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"}, - {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"}, - {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"}, - {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"}, - {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"}, - {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"}, - {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"}, - {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"}, - {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"}, - {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"}, - {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"}, - {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"}, - {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"}, - {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"}, +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:05703cf211d585109fcd72207a31bb170a0f22144d68298dc5e61b3c946518af"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:146d14bebb7f1dc4d5fbf74f8a6cb15ac42baadee8912eb84ac0b3b2a3dc6ac3"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ef7afcd2d281494c0a9101d5c571970708ad911d028137cd558f02b851c08b4"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9eaa8b117dc8337728e834b9c6e2611f10c79e38f65157c4c38e9400286f5cb1"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec20916e7b4cbfb1f12380e46486ec4bcbaa91a9c448b97023fde0d5bbf9e4ff"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b42c68602539407884cf70d6a480a469b93b81b7701378ba5e2328660c847a"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa12042de0171fad672b6c59df69106d20d5596e4f87b5e8f76df757a7c399aa"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a40773c71d7ccdd3798f6489aaac9eee213d566850a9533f8d26332d626b82c"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:19df6e621f6d8b4b9c4d45f40a66839294ff2bb235e64d2178f7522d9170ac5b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:83d78376d0d4fd884e2c114d0621624b73d2aba4e2788182d286309ebdeed770"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e391b1f0a8a5a10ab3b9bb6afcfd74f2175f24f8975fb87ecae700d1503cdee0"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:852542f9481f4a62dbb5dd99e8ab7aedfeb8fb6342349a181d4036877410f525"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59edc41b24031bc25108e210c0def6f6c2191210492a972d585a06ff246bb79b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win32.whl", hash = "sha256:a6aa6315319a052b4ee378aa171959c898a6183f15c1e541821c5c59beaa0238"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win_amd64.whl", hash = "sha256:d0ef46024e6a3d79c01ff13801cb19d0cad7fd859b15037aec74315540acc276"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:11863aa14a51fd6ec28688d76f1735f8f69ab1fabf388851a595d0721af042f5"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ab3919a9997ab7ef2fbbed0cc99bb28d3c13e6d4b1ad36e97e482558a91be90"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fcc700eadbbccbf6bc1bcb9dbe0786b4b1cb91ca0dcda336eef5c2beed37b797"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfdd7c0b105af050eb3d64997809dc21da247cf44e63dc73ff0fd20b96be55a9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76c6a5964640638cdeaa0c359382e5703e9293030fe730018ca06bc2010c4437"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbea0db94288e29afcc4c28afbf3a7ccaf2d7e027489c449cf7e8f83c6346eb9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ceec1a6bc6cab1d6ff5d06592a91a692f90ec7505d6463a88a52cc0eb58545da"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:040c1aebeda72197ef477a906782b5ab0d387642e93bda547336b8957c61022e"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f91de7223d4c7b793867797bacd1ee53bfe7359bd70d27b7b58a04efbb9436c8"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:faae4860798c31530dd184046a900e652c95513796ef51a12bc086710c2eec4d"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0157420efcb803e71d1b28e2c287518b8808b7cf1ab8af36718fd0a2c453eb0"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:06f54715b7737c2fecdbf140d1afb11a33d59508a47bf11bb38ecf21dc9ab79f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fdb7adb641a0d13bdcd4ef48e062363d8a9ad4a182ac7647ec88f695e719ae9f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win32.whl", hash = "sha256:bb86433b1cfe686da83ce32a9d3a8dd308e85c76b60896d58f082136f10bffac"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c08e1312a9cf1074d17b17728d3dfce2a5125b2d791527f33ffbe805200a355"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:32d5cf40c4f7c7b3ca500f8985eb3fb3a7dfc023215e876f207956b5ea26632a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f846c260f483d1fd217fe5ed7c173fb109efa6b1fc8381c8b7552c5781756192"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5ff5cf3571589b6d13bfbfd6bcd7a3f659e42f96b5fd1c4830c4cf21d4f5ef45"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7269d9e5f1084a653d575c7ec012ff57f0c042258bf5db0954bf551c158466e7"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da802a19d6e15dffe4b0c24b38b3af68e6c1a68e6e1d8f30148c83864f3881db"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3aba7311af82e335dd1e36ffff68aaca609ca6290c2cb6d821a39aa075d8e3ff"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:763773d53f07244148ccac5b084da5adb90bfaee39c197554f01b286cf869228"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2270953c0d8cdab5d422bee7d2007f043473f9d2999631c86a223c9db56cbd16"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d099e745a512f7e3bbe7249ca835f4d357c586d78d79ae8f1dcd4d8adeb9bda9"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:74db36e14a7d1ce0986fa104f7d5637aea5c82ca6326ed0ec5694280942d1162"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e5bab140c309cb3a6ce373a9e71eb7e4873c70c2dda01df6820474f9889d6d4"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0f114aa76dc1b8f636d077979c0ac22e7cd8f3493abbab152f20eb8d3cda71f3"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:88a2df29d4724b9237fc0c6eaf2a1adae0cdc0b3e9f4d8e7dc54b16812d2d81a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win32.whl", hash = "sha256:72d40b33e834371fd330fb1472ca19d9b8327acb79a5821d4008391db8e29f20"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:2c5674c4e74d939b9d91dda0fae10597ac7521768fec9e399c70a1f27e2ea2d9"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3a2b053a0ab7a3960c98725cfb0bf5b48ba82f64ec95fe06f1d06c99b552e130"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd32d6c13807e5c66a7cbb79f90b553642f296ae4518a60d8d76243b0ad2898"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59ec7b7c7e1a61061850d53aaf8e93db63dce0c936db1fda2658b70e4a1be709"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da4cfb373035def307905d05041c1d06d8936452fe89d464743ae7fb8371078b"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2400873bccc260b6ae184b2b8a4fec0e4082d30648eadb7c3d9a13405d861e89"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1b04139c4236a0f3aff534479b58f6f849a8b351e1314826c2d230849ed48985"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:4e66e81a5779b65ac21764c295087de82235597a2293d18d943f8e9e32746265"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7931d8f1f67c4be9ba1dd9c451fb0eeca1a25b89e4d3f89e828fe12a519b782a"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:b3f7e75f3015df442238cca659f8baa5f42ce2a8582727981cbfa15fee0ee205"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:bbf1d63eef84b2e8c89011b7f2235b1e0bf7dacc11cac9431fc6468e99ac77fb"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4c380469bd3f970ef677bf2bcba2b6b0b4d5c75e7a020fb863ef75084efad66f"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-win32.whl", hash = "sha256:9408acf3270c4b6baad483865191e3e582b638b1654a007c62e3efe96f09a9a3"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-win_amd64.whl", hash = "sha256:5b94529f9b2591b7af5f3e0e730a4e0a41ea174af35a4fd067775f9bdfeee01a"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:11c7de8f692fc99816e8ac50d1d1aef4f75126eefc33ac79aac02c099fd3db71"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:53abb58632235cd154176ced1ae8f0d29a6657aa1aa9decf50b899b755bc2b93"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:88b9f257ca61b838b6f8094a62418421f87ac2a1069f7e896c36a7d86b5d4c29"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3195782b26fc03aa9c6913d5bad5aeb864bdc372924c093b0f1cebad603dd712"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc579bf0f502e54926519451b920e875f433aceb4624a3646b3252b5caa9e0b6"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a580c91d686376f0f7c295357595c5a026e6cbc3d77b7c36e290201e7c11ecb"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cfe6ab8da05c01ba6fbea630377b5da2cd9bcbc6338510116b01c1bc939a2c18"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d2e5a98f0ec99beb3c10e13b387f8db39106d53993f498b295f0c914328b1333"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a51a263952b1429e429ff236d2f5a21c5125437861baeed77f5e1cc2d2c7c6da"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3edd2fa14e68c9be82c5b16689e8d63d89fe927e56debd6e1dbce7a26a17f81b"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:74d1b44c6cfc897df648cc9fdaa09bc3e7679926e6f96df05775d4fb3946571c"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76d9289ed3f7501012e05abb8358bbb129149dbd173f1f57a1bf1c22d19ab7cc"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:92dea1ffe3714fa8eb6a314d2b3c773208d865a0e0d35e713ec54eea08a66250"}, + {file = "kiwisolver-1.4.5-cp38-cp38-win32.whl", hash = "sha256:5c90ae8c8d32e472be041e76f9d2f2dbff4d0b0be8bd4041770eddb18cf49a4e"}, + {file = "kiwisolver-1.4.5-cp38-cp38-win_amd64.whl", hash = "sha256:c7940c1dc63eb37a67721b10d703247552416f719c4188c54e04334321351ced"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9407b6a5f0d675e8a827ad8742e1d6b49d9c1a1da5d952a67d50ef5f4170b18d"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15568384086b6df3c65353820a4473575dbad192e35010f622c6ce3eebd57af9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0dc9db8e79f0036e8173c466d21ef18e1befc02de8bf8aa8dc0813a6dc8a7046"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cdc8a402aaee9a798b50d8b827d7ecf75edc5fb35ea0f91f213ff927c15f4ff0"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:955e8513d07a283056b1396e9a57ceddbd272d9252c14f154d450d227606eb54"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:346f5343b9e3f00b8db8ba359350eb124b98c99efd0b408728ac6ebf38173958"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9098e0049e88c6a24ff64545cdfc50807818ba6c1b739cae221bbbcbc58aad3"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:00bd361b903dc4bbf4eb165f24d1acbee754fce22ded24c3d56eec268658a5cf"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7b8b454bac16428b22560d0a1cf0a09875339cab69df61d7805bf48919415901"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f1d072c2eb0ad60d4c183f3fb44ac6f73fb7a8f16a2694a91f988275cbf352f9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:31a82d498054cac9f6d0b53d02bb85811185bcb477d4b60144f915f3b3126342"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6512cb89e334e4700febbffaaa52761b65b4f5a3cf33f960213d5656cea36a77"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win32.whl", hash = "sha256:9db8ea4c388fdb0f780fe91346fd438657ea602d58348753d9fb265ce1bca67f"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win_amd64.whl", hash = "sha256:59415f46a37f7f2efeec758353dd2eae1b07640d8ca0f0c42548ec4125492635"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5c7b3b3a728dc6faf3fc372ef24f21d1e3cee2ac3e9596691d746e5a536de920"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:620ced262a86244e2be10a676b646f29c34537d0d9cc8eb26c08f53d98013390"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:378a214a1e3bbf5ac4a8708304318b4f890da88c9e6a07699c4ae7174c09a68d"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf7be1207676ac608a50cd08f102f6742dbfc70e8d60c4db1c6897f62f71523"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ba55dce0a9b8ff59495ddd050a0225d58bd0983d09f87cfe2b6aec4f2c1234e4"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fd32ea360bcbb92d28933fc05ed09bffcb1704ba3fc7942e81db0fd4f81a7892"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5e7139af55d1688f8b960ee9ad5adafc4ac17c1c473fe07133ac092310d76544"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dced8146011d2bc2e883f9bd68618b8247387f4bbec46d7392b3c3b032640126"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9bf3325c47b11b2e51bca0824ea217c7cd84491d8ac4eefd1e409705ef092bd"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5794cf59533bc3f1b1c821f7206a3617999db9fbefc345360aafe2e067514929"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e368f200bbc2e4f905b8e71eb38b3c04333bddaa6a2464a6355487b02bb7fb09"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d706eba36b4c4d5bc6c6377bb6568098765e990cfc21ee16d13963fab7b3e7"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85267bd1aa8880a9c88a8cb71e18d3d64d2751a790e6ca6c27b8ccc724bcd5ad"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210ef2c3a1f03272649aff1ef992df2e724748918c4bc2d5a90352849eb40bea"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:11d011a7574eb3b82bcc9c1a1d35c1d7075677fdd15de527d91b46bd35e935ee"}, + {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, ] [[package]] @@ -1241,6 +990,7 @@ version = "1.0.9" description = "Language detection library ported from Google's language-detection." optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"}, @@ -1249,171 +999,164 @@ files = [ [package.dependencies] six = "*" -[[package]] -name = "lightning-utilities" -version = "0.11.2" -description = "Lightning toolbox for across the our ecosystem." -optional = false -python-versions = ">=3.8" -files = [ - {file = "lightning-utilities-0.11.2.tar.gz", hash = "sha256:adf4cf9c5d912fe505db4729e51d1369c6927f3a8ac55a9dff895ce5c0da08d9"}, - {file = "lightning_utilities-0.11.2-py3-none-any.whl", hash = "sha256:541f471ed94e18a28d72879338c8c52e873bb46f4c47644d89228faeb6751159"}, -] - -[package.dependencies] -packaging = ">=17.1" -setuptools = "*" -typing-extensions = "*" - -[package.extras] -cli = ["fire"] -docs = ["requests (>=2.0.0)"] -typing = ["mypy (>=1.0.0)", "types-setuptools"] - [[package]] name = "lxml" -version = "5.4.0" +version = "5.2.2" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.6" -files = [ - {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c"}, - {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:696ea9e87442467819ac22394ca36cb3d01848dad1be6fac3fb612d3bd5a12cf"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef80aeac414f33c24b3815ecd560cee272786c3adfa5f31316d8b349bfade28"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b9c2754cef6963f3408ab381ea55f47dabc6f78f4b8ebb0f0b25cf1ac1f7609"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a62cc23d754bb449d63ff35334acc9f5c02e6dae830d78dab4dd12b78a524f4"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f82125bc7203c5ae8633a7d5d20bcfdff0ba33e436e4ab0abc026a53a8960b7"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b67319b4aef1a6c56576ff544b67a2a6fbd7eaee485b241cabf53115e8908b8f"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:a8ef956fce64c8551221f395ba21d0724fed6b9b6242ca4f2f7beb4ce2f41997"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:0a01ce7d8479dce84fc03324e3b0c9c90b1ece9a9bb6a1b6c9025e7e4520e78c"}, - {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91505d3ddebf268bb1588eb0f63821f738d20e1e7f05d3c647a5ca900288760b"}, - {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a3bcdde35d82ff385f4ede021df801b5c4a5bcdfb61ea87caabcebfc4945dc1b"}, - {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aea7c06667b987787c7d1f5e1dfcd70419b711cdb47d6b4bb4ad4b76777a0563"}, - {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:a7fb111eef4d05909b82152721a59c1b14d0f365e2be4c742a473c5d7372f4f5"}, - {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43d549b876ce64aa18b2328faff70f5877f8c6dede415f80a2f799d31644d776"}, - {file = "lxml-5.4.0-cp310-cp310-win32.whl", hash = "sha256:75133890e40d229d6c5837b0312abbe5bac1c342452cf0e12523477cd3aa21e7"}, - {file = "lxml-5.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:de5b4e1088523e2b6f730d0509a9a813355b7f5659d70eb4f319c76beea2e250"}, - {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9"}, - {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8"}, - {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86"}, - {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056"}, - {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7"}, - {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd"}, - {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751"}, - {file = "lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4"}, - {file = "lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539"}, - {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4"}, - {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7"}, - {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079"}, - {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20"}, - {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8"}, - {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f"}, - {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc"}, - {file = "lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f"}, - {file = "lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2"}, - {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0"}, - {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8"}, - {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982"}, - {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61"}, - {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54"}, - {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b"}, - {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a"}, - {file = "lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82"}, - {file = "lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f"}, - {file = "lxml-5.4.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7be701c24e7f843e6788353c055d806e8bd8466b52907bafe5d13ec6a6dbaecd"}, - {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb54f7c6bafaa808f27166569b1511fc42701a7713858dddc08afdde9746849e"}, - {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97dac543661e84a284502e0cf8a67b5c711b0ad5fb661d1bd505c02f8cf716d7"}, - {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:c70e93fba207106cb16bf852e421c37bbded92acd5964390aad07cb50d60f5cf"}, - {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9c886b481aefdf818ad44846145f6eaf373a20d200b5ce1a5c8e1bc2d8745410"}, - {file = "lxml-5.4.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:fa0e294046de09acd6146be0ed6727d1f42ded4ce3ea1e9a19c11b6774eea27c"}, - {file = "lxml-5.4.0-cp36-cp36m-win32.whl", hash = "sha256:61c7bbf432f09ee44b1ccaa24896d21075e533cd01477966a5ff5a71d88b2f56"}, - {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, - {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, - {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, - {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, - {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, - {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, - {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, - {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, - {file = "lxml-5.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eaf24066ad0b30917186420d51e2e3edf4b0e2ea68d8cd885b14dc8afdcf6556"}, - {file = "lxml-5.4.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b31a3a77501d86d8ade128abb01082724c0dfd9524f542f2f07d693c9f1175f"}, - {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e108352e203c7afd0eb91d782582f00a0b16a948d204d4dec8565024fafeea5"}, - {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11a96c3b3f7551c8a8109aa65e8594e551d5a84c76bf950da33d0fb6dfafab7"}, - {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:ca755eebf0d9e62d6cb013f1261e510317a41bf4650f22963474a663fdfe02aa"}, - {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:4cd915c0fb1bed47b5e6d6edd424ac25856252f09120e3e8ba5154b6b921860e"}, - {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:226046e386556a45ebc787871d6d2467b32c37ce76c2680f5c608e25823ffc84"}, - {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b108134b9667bcd71236c5a02aad5ddd073e372fb5d48ea74853e009fe38acb6"}, - {file = "lxml-5.4.0-cp38-cp38-win32.whl", hash = "sha256:1320091caa89805df7dcb9e908add28166113dcd062590668514dbd510798c88"}, - {file = "lxml-5.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:073eb6dcdf1f587d9b88c8c93528b57eccda40209cf9be549d469b942b41d70b"}, - {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bda3ea44c39eb74e2488297bb39d47186ed01342f0022c8ff407c250ac3f498e"}, - {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9ceaf423b50ecfc23ca00b7f50b64baba85fb3fb91c53e2c9d00bc86150c7e40"}, - {file = "lxml-5.4.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:664cdc733bc87449fe781dbb1f309090966c11cc0c0cd7b84af956a02a8a4729"}, - {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67ed8a40665b84d161bae3181aa2763beea3747f748bca5874b4af4d75998f87"}, - {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b4a3bd174cc9cdaa1afbc4620c049038b441d6ba07629d89a83b408e54c35cd"}, - {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b0989737a3ba6cf2a16efb857fb0dfa20bc5c542737fddb6d893fde48be45433"}, - {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:dc0af80267edc68adf85f2a5d9be1cdf062f973db6790c1d065e45025fa26140"}, - {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:639978bccb04c42677db43c79bdaa23785dc7f9b83bfd87570da8207872f1ce5"}, - {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a99d86351f9c15e4a901fc56404b485b1462039db59288b203f8c629260a142"}, - {file = "lxml-5.4.0-cp39-cp39-win32.whl", hash = "sha256:3e6d5557989cdc3ebb5302bbdc42b439733a841891762ded9514e74f60319ad6"}, - {file = "lxml-5.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8c9b7f16b63e65bbba889acb436a1034a82d34fa09752d754f88d708eca80e1"}, - {file = "lxml-5.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1b717b00a71b901b4667226bba282dd462c42ccf618ade12f9ba3674e1fabc55"}, - {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27a9ded0f0b52098ff89dd4c418325b987feed2ea5cc86e8860b0f844285d740"}, - {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7ce10634113651d6f383aa712a194179dcd496bd8c41e191cec2099fa09de5"}, - {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53370c26500d22b45182f98847243efb518d268374a9570409d2e2276232fd37"}, - {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6364038c519dffdbe07e3cf42e6a7f8b90c275d4d1617a69bb59734c1a2d571"}, - {file = "lxml-5.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b12cb6527599808ada9eb2cd6e0e7d3d8f13fe7bbb01c6311255a15ded4c7ab4"}, - {file = "lxml-5.4.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5f11a1526ebd0dee85e7b1e39e39a0cc0d9d03fb527f56d8457f6df48a10dc0c"}, - {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48b4afaf38bf79109bb060d9016fad014a9a48fb244e11b94f74ae366a64d252"}, - {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de6f6bb8a7840c7bf216fb83eec4e2f79f7325eca8858167b68708b929ab2172"}, - {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5cca36a194a4eb4e2ed6be36923d3cffd03dcdf477515dea687185506583d4c9"}, - {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b7c86884ad23d61b025989d99bfdd92a7351de956e01c61307cb87035960bcb1"}, - {file = "lxml-5.4.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:53d9469ab5460402c19553b56c3648746774ecd0681b1b27ea74d5d8a3ef5590"}, - {file = "lxml-5.4.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:56dbdbab0551532bb26c19c914848d7251d73edb507c3079d6805fa8bba5b706"}, - {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14479c2ad1cb08b62bb941ba8e0e05938524ee3c3114644df905d2331c76cd57"}, - {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32697d2ea994e0db19c1df9e40275ffe84973e4232b5c274f47e7c1ec9763cdd"}, - {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:24f6df5f24fc3385f622c0c9d63fe34604893bc1a5bdbb2dbf5870f85f9a404a"}, - {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:151d6c40bc9db11e960619d2bf2ec5829f0aaffb10b41dcf6ad2ce0f3c0b2325"}, - {file = "lxml-5.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4025bf2884ac4370a3243c5aa8d66d3cb9e15d3ddd0af2d796eccc5f0244390e"}, - {file = "lxml-5.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9459e6892f59ecea2e2584ee1058f5d8f629446eab52ba2305ae13a32a059530"}, - {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47fb24cc0f052f0576ea382872b3fc7e1f7e3028e53299ea751839418ade92a6"}, - {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50441c9de951a153c698b9b99992e806b71c1f36d14b154592580ff4a9d0d877"}, - {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ab339536aa798b1e17750733663d272038bf28069761d5be57cb4a9b0137b4f8"}, - {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9776af1aad5a4b4a1317242ee2bea51da54b2a7b7b48674be736d463c999f37d"}, - {file = "lxml-5.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:63e7968ff83da2eb6fdda967483a7a023aa497d85ad8f05c3ad9b1f2e8c84987"}, - {file = "lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd"}, +groups = ["dev"] +files = [ + {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:364d03207f3e603922d0d3932ef363d55bbf48e3647395765f9bfcbdf6d23632"}, + {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50127c186f191b8917ea2fb8b206fbebe87fd414a6084d15568c27d0a21d60db"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4f025ef3db1c6da4460dd27c118d8cd136d0391da4e387a15e48e5c975147"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981a06a3076997adf7c743dcd0d7a0415582661e2517c7d961493572e909aa1d"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aef5474d913d3b05e613906ba4090433c515e13ea49c837aca18bde190853dff"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e275ea572389e41e8b039ac076a46cb87ee6b8542df3fff26f5baab43713bca"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5b65529bb2f21ac7861a0e94fdbf5dc0daab41497d18223b46ee8515e5ad297"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bcc98f911f10278d1daf14b87d65325851a1d29153caaf146877ec37031d5f36"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:b47633251727c8fe279f34025844b3b3a3e40cd1b198356d003aa146258d13a2"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:fbc9d316552f9ef7bba39f4edfad4a734d3d6f93341232a9dddadec4f15d425f"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:13e69be35391ce72712184f69000cda04fc89689429179bc4c0ae5f0b7a8c21b"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3b6a30a9ab040b3f545b697cb3adbf3696c05a3a68aad172e3fd7ca73ab3c835"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a233bb68625a85126ac9f1fc66d24337d6e8a0f9207b688eec2e7c880f012ec0"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dfa7c241073d8f2b8e8dbc7803c434f57dbb83ae2a3d7892dd068d99e96efe2c"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a7aca7964ac4bb07680d5c9d63b9d7028cace3e2d43175cb50bba8c5ad33316"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ae4073a60ab98529ab8a72ebf429f2a8cc612619a8c04e08bed27450d52103c0"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ffb2be176fed4457e445fe540617f0252a72a8bc56208fd65a690fdb1f57660b"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e290d79a4107d7d794634ce3e985b9ae4f920380a813717adf61804904dc4393"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96e85aa09274955bb6bd483eaf5b12abadade01010478154b0ec70284c1b1526"}, + {file = "lxml-5.2.2-cp310-cp310-win32.whl", hash = "sha256:f956196ef61369f1685d14dad80611488d8dc1ef00be57c0c5a03064005b0f30"}, + {file = "lxml-5.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:875a3f90d7eb5c5d77e529080d95140eacb3c6d13ad5b616ee8095447b1d22e7"}, + {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:45f9494613160d0405682f9eee781c7e6d1bf45f819654eb249f8f46a2c22545"}, + {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0b3f2df149efb242cee2ffdeb6674b7f30d23c9a7af26595099afaf46ef4e88"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d28cb356f119a437cc58a13f8135ab8a4c8ece18159eb9194b0d269ec4e28083"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657a972f46bbefdbba2d4f14413c0d079f9ae243bd68193cb5061b9732fa54c1"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b9ea10063efb77a965a8d5f4182806fbf59ed068b3c3fd6f30d2ac7bee734"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07542787f86112d46d07d4f3c4e7c760282011b354d012dc4141cc12a68cef5f"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:303f540ad2dddd35b92415b74b900c749ec2010e703ab3bfd6660979d01fd4ed"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2eb2227ce1ff998faf0cd7fe85bbf086aa41dfc5af3b1d80867ecfe75fb68df3"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:1d8a701774dfc42a2f0b8ccdfe7dbc140500d1049e0632a611985d943fcf12df"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:56793b7a1a091a7c286b5f4aa1fe4ae5d1446fe742d00cdf2ffb1077865db10d"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eb00b549b13bd6d884c863554566095bf6fa9c3cecb2e7b399c4bc7904cb33b5"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a2569a1f15ae6c8c64108a2cd2b4a858fc1e13d25846be0666fc144715e32ab"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:8cf85a6e40ff1f37fe0f25719aadf443686b1ac7652593dc53c7ef9b8492b115"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d237ba6664b8e60fd90b8549a149a74fcc675272e0e95539a00522e4ca688b04"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0b3f5016e00ae7630a4b83d0868fca1e3d494c78a75b1c7252606a3a1c5fc2ad"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:23441e2b5339bc54dc949e9e675fa35efe858108404ef9aa92f0456929ef6fe8"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2fb0ba3e8566548d6c8e7dd82a8229ff47bd8fb8c2da237607ac8e5a1b8312e5"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:79d1fb9252e7e2cfe4de6e9a6610c7cbb99b9708e2c3e29057f487de5a9eaefa"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6dcc3d17eac1df7859ae01202e9bb11ffa8c98949dcbeb1069c8b9a75917e01b"}, + {file = "lxml-5.2.2-cp311-cp311-win32.whl", hash = "sha256:4c30a2f83677876465f44c018830f608fa3c6a8a466eb223535035fbc16f3438"}, + {file = "lxml-5.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:49095a38eb333aaf44c06052fd2ec3b8f23e19747ca7ec6f6c954ffea6dbf7be"}, + {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7429e7faa1a60cad26ae4227f4dd0459efde239e494c7312624ce228e04f6391"}, + {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:50ccb5d355961c0f12f6cf24b7187dbabd5433f29e15147a67995474f27d1776"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc911208b18842a3a57266d8e51fc3cfaccee90a5351b92079beed912a7914c2"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33ce9e786753743159799fdf8e92a5da351158c4bfb6f2db0bf31e7892a1feb5"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec87c44f619380878bd49ca109669c9f221d9ae6883a5bcb3616785fa8f94c97"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08ea0f606808354eb8f2dfaac095963cb25d9d28e27edcc375d7b30ab01abbf6"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75a9632f1d4f698b2e6e2e1ada40e71f369b15d69baddb8968dcc8e683839b18"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74da9f97daec6928567b48c90ea2c82a106b2d500f397eeb8941e47d30b1ca85"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:0969e92af09c5687d769731e3f39ed62427cc72176cebb54b7a9d52cc4fa3b73"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:9164361769b6ca7769079f4d426a41df6164879f7f3568be9086e15baca61466"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d26a618ae1766279f2660aca0081b2220aca6bd1aa06b2cf73f07383faf48927"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab67ed772c584b7ef2379797bf14b82df9aa5f7438c5b9a09624dd834c1c1aaf"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3d1e35572a56941b32c239774d7e9ad724074d37f90c7a7d499ab98761bd80cf"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:8268cbcd48c5375f46e000adb1390572c98879eb4f77910c6053d25cc3ac2c67"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e282aedd63c639c07c3857097fc0e236f984ceb4089a8b284da1c526491e3f3d"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfdc2bfe69e9adf0df4915949c22a25b39d175d599bf98e7ddf620a13678585"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4aefd911793b5d2d7a921233a54c90329bf3d4a6817dc465f12ffdfe4fc7b8fe"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8b8df03a9e995b6211dafa63b32f9d405881518ff1ddd775db4e7b98fb545e1c"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f11ae142f3a322d44513de1018b50f474f8f736bc3cd91d969f464b5bfef8836"}, + {file = "lxml-5.2.2-cp312-cp312-win32.whl", hash = "sha256:16a8326e51fcdffc886294c1e70b11ddccec836516a343f9ed0f82aac043c24a"}, + {file = "lxml-5.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:bbc4b80af581e18568ff07f6395c02114d05f4865c2812a1f02f2eaecf0bfd48"}, + {file = "lxml-5.2.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e3d9d13603410b72787579769469af730c38f2f25505573a5888a94b62b920f8"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38b67afb0a06b8575948641c1d6d68e41b83a3abeae2ca9eed2ac59892b36706"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c689d0d5381f56de7bd6966a4541bff6e08bf8d3871bbd89a0c6ab18aa699573"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:cf2a978c795b54c539f47964ec05e35c05bd045db5ca1e8366988c7f2fe6b3ce"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:739e36ef7412b2bd940f75b278749106e6d025e40027c0b94a17ef7968d55d56"}, + {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d8bbcd21769594dbba9c37d3c819e2d5847656ca99c747ddb31ac1701d0c0ed9"}, + {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:2304d3c93f2258ccf2cf7a6ba8c761d76ef84948d87bf9664e14d203da2cd264"}, + {file = "lxml-5.2.2-cp36-cp36m-win32.whl", hash = "sha256:02437fb7308386867c8b7b0e5bc4cd4b04548b1c5d089ffb8e7b31009b961dc3"}, + {file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"}, + {file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"}, + {file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"}, + {file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"}, + {file = "lxml-5.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7ed07b3062b055d7a7f9d6557a251cc655eed0b3152b76de619516621c56f5d3"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f60fdd125d85bf9c279ffb8e94c78c51b3b6a37711464e1f5f31078b45002421"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a7e24cb69ee5f32e003f50e016d5fde438010c1022c96738b04fc2423e61706"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23cfafd56887eaed93d07bc4547abd5e09d837a002b791e9767765492a75883f"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19b4e485cd07b7d83e3fe3b72132e7df70bfac22b14fe4bf7a23822c3a35bff5"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7ce7ad8abebe737ad6143d9d3bf94b88b93365ea30a5b81f6877ec9c0dee0a48"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e49b052b768bb74f58c7dda4e0bdf7b79d43a9204ca584ffe1fb48a6f3c84c66"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d14a0d029a4e176795cef99c056d58067c06195e0c7e2dbb293bf95c08f772a3"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:be49ad33819d7dcc28a309b86d4ed98e1a65f3075c6acd3cd4fe32103235222b"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a6d17e0370d2516d5bb9062c7b4cb731cff921fc875644c3d751ad857ba9c5b1"}, + {file = "lxml-5.2.2-cp38-cp38-win32.whl", hash = "sha256:5b8c041b6265e08eac8a724b74b655404070b636a8dd6d7a13c3adc07882ef30"}, + {file = "lxml-5.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:f61efaf4bed1cc0860e567d2ecb2363974d414f7f1f124b1df368bbf183453a6"}, + {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb91819461b1b56d06fa4bcf86617fac795f6a99d12239fb0c68dbeba41a0a30"}, + {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d4ed0c7cbecde7194cd3228c044e86bf73e30a23505af852857c09c24e77ec5d"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54401c77a63cc7d6dc4b4e173bb484f28a5607f3df71484709fe037c92d4f0ed"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:625e3ef310e7fa3a761d48ca7ea1f9d8718a32b1542e727d584d82f4453d5eeb"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:519895c99c815a1a24a926d5b60627ce5ea48e9f639a5cd328bda0515ea0f10c"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c7079d5eb1c1315a858bbf180000757db8ad904a89476653232db835c3114001"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:343ab62e9ca78094f2306aefed67dcfad61c4683f87eee48ff2fd74902447726"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:cd9e78285da6c9ba2d5c769628f43ef66d96ac3085e59b10ad4f3707980710d3"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:546cf886f6242dff9ec206331209db9c8e1643ae642dea5fdbecae2453cb50fd"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:02f6a8eb6512fdc2fd4ca10a49c341c4e109aa6e9448cc4859af5b949622715a"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:339ee4a4704bc724757cd5dd9dc8cf4d00980f5d3e6e06d5847c1b594ace68ab"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0a028b61a2e357ace98b1615fc03f76eb517cc028993964fe08ad514b1e8892d"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f90e552ecbad426eab352e7b2933091f2be77115bb16f09f78404861c8322981"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:d83e2d94b69bf31ead2fa45f0acdef0757fa0458a129734f59f67f3d2eb7ef32"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a02d3c48f9bb1e10c7788d92c0c7db6f2002d024ab6e74d6f45ae33e3d0288a3"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6d68ce8e7b2075390e8ac1e1d3a99e8b6372c694bbe612632606d1d546794207"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:453d037e09a5176d92ec0fd282e934ed26d806331a8b70ab431a81e2fbabf56d"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:3b019d4ee84b683342af793b56bb35034bd749e4cbdd3d33f7d1107790f8c472"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb3942960f0beb9f46e2a71a3aca220d1ca32feb5a398656be934320804c0df9"}, + {file = "lxml-5.2.2-cp39-cp39-win32.whl", hash = "sha256:ac6540c9fff6e3813d29d0403ee7a81897f1d8ecc09a8ff84d2eea70ede1cdbf"}, + {file = "lxml-5.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:610b5c77428a50269f38a534057444c249976433f40f53e3b47e68349cca1425"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b537bd04d7ccd7c6350cdaaaad911f6312cbd61e6e6045542f781c7f8b2e99d2"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4820c02195d6dfb7b8508ff276752f6b2ff8b64ae5d13ebe02e7667e035000b9"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a09f6184f17a80897172863a655467da2b11151ec98ba8d7af89f17bf63dae"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76acba4c66c47d27c8365e7c10b3d8016a7da83d3191d053a58382311a8bf4e1"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b128092c927eaf485928cec0c28f6b8bead277e28acf56800e972aa2c2abd7a2"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ae791f6bd43305aade8c0e22f816b34f3b72b6c820477aab4d18473a37e8090b"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a2f6a1bc2460e643785a2cde17293bd7a8f990884b822f7bca47bee0a82fc66b"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e8d351ff44c1638cb6e980623d517abd9f580d2e53bfcd18d8941c052a5a009"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec4bd9133420c5c52d562469c754f27c5c9e36ee06abc169612c959bd7dbb07"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:55ce6b6d803890bd3cc89975fca9de1dff39729b43b73cb15ddd933b8bc20484"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ab6a358d1286498d80fe67bd3d69fcbc7d1359b45b41e74c4a26964ca99c3f8"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:06668e39e1f3c065349c51ac27ae430719d7806c026fec462e5693b08b95696b"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9cd5323344d8ebb9fb5e96da5de5ad4ebab993bbf51674259dbe9d7a18049525"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89feb82ca055af0fe797a2323ec9043b26bc371365847dbe83c7fd2e2f181c34"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e481bba1e11ba585fb06db666bfc23dbe181dbafc7b25776156120bf12e0d5a6"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d6c6ea6a11ca0ff9cd0390b885984ed31157c168565702959c25e2191674a14"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3d98de734abee23e61f6b8c2e08a88453ada7d6486dc7cdc82922a03968928db"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:69ab77a1373f1e7563e0fb5a29a8440367dec051da6c7405333699d07444f511"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:34e17913c431f5ae01d8658dbf792fdc457073dcdfbb31dc0cc6ab256e664a8d"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f8757b03208c3f50097761be2dea0aba02e94f0dc7023ed73a7bb14ff11eb0"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a520b4f9974b0a0a6ed73c2154de57cdfd0c8800f4f15ab2b73238ffed0b36e"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5e097646944b66207023bc3c634827de858aebc226d5d4d6d16f0b77566ea182"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b5e4ef22ff25bfd4ede5f8fb30f7b24446345f3e79d9b7455aef2836437bc38a"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff69a9a0b4b17d78170c73abe2ab12084bdf1691550c5629ad1fe7849433f324"}, + {file = "lxml-5.2.2.tar.gz", hash = "sha256:bb2dc4898180bea79863d5487e5f9c7c34297414bad54bcd0f0852aee9cfdb87"}, ] [package.extras] cssselect = ["cssselect (>=0.7)"] -html-clean = ["lxml_html_clean"] +html-clean = ["lxml-html-clean"] html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] -source = ["Cython (>=3.0.11,<3.1.0)"] +source = ["Cython (>=3.0.10)"] [[package]] name = "markupsafe" @@ -1421,6 +1164,7 @@ version = "2.1.5" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, @@ -1490,6 +1234,7 @@ version = "3.9.4" description = "Python plotting package" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "matplotlib-3.9.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:c5fdd7abfb706dfa8d307af64a87f1a862879ec3cd8d0ec8637458f0885b9c50"}, {file = "matplotlib-3.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d89bc4e85e40a71d1477780366c27fb7c6494d293e1617788986f74e2a03d7ff"}, @@ -1549,33 +1294,16 @@ python-dateutil = ">=2.7" [package.extras] dev = ["meson-python (>=0.13.1,<0.17.0)", "numpy (>=1.25)", "pybind11 (>=2.6,!=2.13.3)", "setuptools (>=64)", "setuptools_scm (>=7)"] -[[package]] -name = "mkl" -version = "2021.4.0" -description = "IntelĀ® oneAPI Math Kernel Library" -optional = false -python-versions = "*" -files = [ - {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"}, - {file = "mkl-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:636d07d90e68ccc9630c654d47ce9fdeb036bb46e2b193b3a9ac8cfea683cce5"}, - {file = "mkl-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:398dbf2b0d12acaf54117a5210e8f191827f373d362d796091d161f610c1ebfb"}, - {file = "mkl-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:439c640b269a5668134e3dcbcea4350459c4a8bc46469669b2d67e07e3d330e8"}, - {file = "mkl-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:ceef3cafce4c009dd25f65d7ad0d833a0fbadc3d8903991ec92351fe5de1e718"}, -] - -[package.dependencies] -intel-openmp = "==2021.*" -tbb = "==2021.*" - [[package]] name = "more-itertools" -version = "10.7.0" +version = "10.3.0" description = "More routines for operating on iterables, beyond itertools" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "more_itertools-10.7.0-py3-none-any.whl", hash = "sha256:d43980384673cb07d2f7d2d918c616b30c659c089ee23953f601d6609c67510e"}, - {file = "more_itertools-10.7.0.tar.gz", hash = "sha256:9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3"}, + {file = "more-itertools-10.3.0.tar.gz", hash = "sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463"}, + {file = "more_itertools-10.3.0-py3-none-any.whl", hash = "sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320"}, ] [[package]] @@ -1584,6 +1312,7 @@ version = "0.5.10" description = "D3 Viewer for Matplotlib" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "mpld3-0.5.10-py3-none-any.whl", hash = "sha256:80877acce87ea447380fad7374668737505c8c0684aab05238e7c5dc1fab38c1"}, {file = "mpld3-0.5.10.tar.gz", hash = "sha256:a478eb404fa5212505c59133cf272cd9a94105872e605597720e7f84de38fbc7"}, @@ -1599,6 +1328,7 @@ version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" optional = false python-versions = "*" +groups = ["main", "dev"] files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -1607,114 +1337,16 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] +gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] -[[package]] -name = "multidict" -version = "6.0.5" -description = "multidict implementation" -optional = false -python-versions = ">=3.7" -files = [ - {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, - {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, - {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"}, - {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"}, - {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"}, - {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"}, - {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"}, - {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"}, - {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"}, - {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"}, - {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"}, - {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"}, - {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"}, - {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"}, - {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"}, - {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"}, - {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"}, - {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"}, - {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"}, - {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"}, - {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"}, - {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"}, - {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"}, - {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"}, - {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"}, - {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"}, - {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"}, - {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"}, - {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"}, - {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, -] - [[package]] name = "networkx" version = "3.2.1" description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"}, {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"}, @@ -1729,13 +1361,14 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] [[package]] name = "nltk" -version = "3.9.1" +version = "3.8.1" description = "Natural Language Toolkit" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7" +groups = ["dev"] files = [ - {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, - {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, + {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, + {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, ] [package.dependencies] @@ -1758,6 +1391,7 @@ version = "1.9.0" description = "Node.js virtual environment builder" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] files = [ {file = "nodeenv-1.9.0-py2.py3-none-any.whl", hash = "sha256:508ecec98f9f3330b636d4448c0f1a56fc68017c68f1e7857ebc52acf0eb879a"}, {file = "nodeenv-1.9.0.tar.gz", hash = "sha256:07f144e90dae547bf0d4ee8da0ee42664a42a04e02ed68e06324348dafe4bdb1"}, @@ -1769,6 +1403,7 @@ version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, @@ -1808,12 +1443,235 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] +[[package]] +name = "nvidia-cublas-cu12" +version = "12.6.4.1" +description = "CUBLAS native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-win_amd64.whl", hash = "sha256:9e4fa264f4d8a4eb0cdbd34beadc029f453b3bafae02401e999cf3d5a5af75f8"}, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.6.80" +description = "CUDA profiling tools runtime libs." +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-win_amd64.whl", hash = "sha256:bbe6ae76e83ce5251b56e8c8e61a964f757175682bbad058b170b136266ab00a"}, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.6.77" +description = "NVRTC native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:f7007dbd914c56bd80ea31bc43e8e149da38f68158f423ba845fc3292684e45a"}, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.6.77" +description = "CUDA Runtime native Libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:86c58044c824bf3c173c49a2dbc7a6c8b53cb4e4dca50068be0bf64e9dab3f7f"}, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.5.1.17" +description = "cuDNN runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-win_amd64.whl", hash = "sha256:d7af0f8a4f3b4b9dbb3122f2ef553b45694ed9c384d5a75bab197b8eefb79ab8"}, +] + +[package.dependencies] +nvidia-cublas-cu12 = "*" + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.3.0.4" +description = "CUFFT native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-win_amd64.whl", hash = "sha256:6048ebddfb90d09d2707efb1fd78d4e3a77cb3ae4dc60e19aab6be0ece2ae464"}, +] + +[package.dependencies] +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cufile-cu12" +version = "1.11.1.6" +description = "cuFile GPUDirect libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, + {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.7.77" +description = "CURAND native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7b2ed8e95595c3591d984ea3603dd66fe6ce6812b886d59049988a712ed06b6e"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-win_amd64.whl", hash = "sha256:6d6d935ffba0f3d439b7cd968192ff068fafd9018dbf1b85b37261b13cfc9905"}, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.7.1.2" +description = "CUDA solver native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dbbe4fc38ec1289c7e5230e16248365e375c3673c9c8bac5796e2e20db07f56e"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-win_amd64.whl", hash = "sha256:6813f9d8073f555444a8705f3ab0296d3e1cb37a16d694c5fc8b862a0d8706d7"}, +] + +[package.dependencies] +nvidia-cublas-cu12 = "*" +nvidia-cusparse-cu12 = "*" +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.5.4.2" +description = "CUSPARSE native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-win_amd64.whl", hash = "sha256:4acb8c08855a26d737398cba8fb6f8f5045d93f82612b4cfd84645a2332ccf20"}, +] + +[package.dependencies] +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.6.3" +description = "NVIDIA cuSPARSELt" +optional = false +python-versions = "*" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-win_amd64.whl", hash = "sha256:3b325bcbd9b754ba43df5a311488fca11a6b5dc3d11df4d190c000cf1a0765c7"}, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.26.2" +description = "NVIDIA Collective Communication Library (NCCL) Runtime" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, + {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.6.85" +description = "Nvidia JIT LTO Library" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:e61120e52ed675747825cdd16febc6a0730537451d867ee58bee3853b1b13d1c"}, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.6.77" +description = "NVIDIA Tools Extension" +optional = false +python-versions = ">=3" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"}, +] + [[package]] name = "packaging" version = "24.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, @@ -1821,53 +1679,41 @@ files = [ [[package]] name = "pandas" -version = "2.2.3" +version = "2.2.2" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" -files = [ - {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, - {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, - {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, - {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, - {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, - {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, - {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, - {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, - {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, - {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, - {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, - {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, - {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, - {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, - {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, - {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, - {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, - {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, - {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, - {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, - {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, - {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, - {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, - {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, - {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, - {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, - {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, - {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, - {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, - {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, - {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, - {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, - {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, - {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, - {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, - {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, - {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, - {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, - {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, - {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, - {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, - {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, +groups = ["main"] +files = [ + {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, + {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, + {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, + {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, + {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, + {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, ] [package.dependencies] @@ -1907,116 +1753,116 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pie-core" -version = "0.1.2" +version = "0.2.0" description = "Core modules of PyTorch-IE" optional = false python-versions = "<4.0,>=3.9" +groups = ["main"] files = [ - {file = "pie_core-0.1.2-py3-none-any.whl", hash = "sha256:2118e7e0d7002182e3c071e7806eece73ce7d68789cf6a0b8a3a718cf3eb910d"}, - {file = "pie_core-0.1.2.tar.gz", hash = "sha256:08c883a2e7618f0bfdeaa428a51a95753cbf3a9b12eb5cca1ab3263d98177303"}, + {file = "pie_core-0.2.0-py3-none-any.whl", hash = "sha256:8bb9740304c9b58246485c9170717d2b947d732d71d2a9d5176f8572d8b98b2a"}, + {file = "pie_core-0.2.0.tar.gz", hash = "sha256:7e41cbe6b554285acbcae8822d84d97c19820ddd7b27dc9751349eb48bc61eb1"}, ] [package.dependencies] huggingface_hub = ">=0.23.4,<0.26.0" -pytorch-lightning = ">=2.1.0,<3.0.0" +torch = ">=2.1.0,<3.0.0" [[package]] name = "pillow" -version = "11.2.1" +version = "10.4.0" description = "Python Imaging Library (Fork)" optional = false -python-versions = ">=3.9" -files = [ - {file = "pillow-11.2.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:d57a75d53922fc20c165016a20d9c44f73305e67c351bbc60d1adaf662e74047"}, - {file = "pillow-11.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:127bf6ac4a5b58b3d32fc8289656f77f80567d65660bc46f72c0d77e6600cc95"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4ba4be812c7a40280629e55ae0b14a0aafa150dd6451297562e1764808bbe61"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8bd62331e5032bc396a93609982a9ab6b411c05078a52f5fe3cc59234a3abd1"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:562d11134c97a62fe3af29581f083033179f7ff435f78392565a1ad2d1c2c45c"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c97209e85b5be259994eb5b69ff50c5d20cca0f458ef9abd835e262d9d88b39d"}, - {file = "pillow-11.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0c3e6d0f59171dfa2e25d7116217543310908dfa2770aa64b8f87605f8cacc97"}, - {file = "pillow-11.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc1c3bc53befb6096b84165956e886b1729634a799e9d6329a0c512ab651e579"}, - {file = "pillow-11.2.1-cp310-cp310-win32.whl", hash = "sha256:312c77b7f07ab2139924d2639860e084ec2a13e72af54d4f08ac843a5fc9c79d"}, - {file = "pillow-11.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:9bc7ae48b8057a611e5fe9f853baa88093b9a76303937449397899385da06fad"}, - {file = "pillow-11.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:2728567e249cdd939f6cc3d1f049595c66e4187f3c34078cbc0a7d21c47482d2"}, - {file = "pillow-11.2.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35ca289f712ccfc699508c4658a1d14652e8033e9b69839edf83cbdd0ba39e70"}, - {file = "pillow-11.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0409af9f829f87a2dfb7e259f78f317a5351f2045158be321fd135973fff7bf"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4e5c5edee874dce4f653dbe59db7c73a600119fbea8d31f53423586ee2aafd7"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b93a07e76d13bff9444f1a029e0af2964e654bfc2e2c2d46bfd080df5ad5f3d8"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:e6def7eed9e7fa90fde255afaf08060dc4b343bbe524a8f69bdd2a2f0018f600"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:8f4f3724c068be008c08257207210c138d5f3731af6c155a81c2b09a9eb3a788"}, - {file = "pillow-11.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a0a6709b47019dff32e678bc12c63008311b82b9327613f534e496dacaefb71e"}, - {file = "pillow-11.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f6b0c664ccb879109ee3ca702a9272d877f4fcd21e5eb63c26422fd6e415365e"}, - {file = "pillow-11.2.1-cp311-cp311-win32.whl", hash = "sha256:cc5d875d56e49f112b6def6813c4e3d3036d269c008bf8aef72cd08d20ca6df6"}, - {file = "pillow-11.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:0f5c7eda47bf8e3c8a283762cab94e496ba977a420868cb819159980b6709193"}, - {file = "pillow-11.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:4d375eb838755f2528ac8cbc926c3e31cc49ca4ad0cf79cff48b20e30634a4a7"}, - {file = "pillow-11.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f"}, - {file = "pillow-11.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4"}, - {file = "pillow-11.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443"}, - {file = "pillow-11.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c"}, - {file = "pillow-11.2.1-cp312-cp312-win32.whl", hash = "sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3"}, - {file = "pillow-11.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941"}, - {file = "pillow-11.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb"}, - {file = "pillow-11.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28"}, - {file = "pillow-11.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155"}, - {file = "pillow-11.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14"}, - {file = "pillow-11.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b"}, - {file = "pillow-11.2.1-cp313-cp313-win32.whl", hash = "sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2"}, - {file = "pillow-11.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691"}, - {file = "pillow-11.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c"}, - {file = "pillow-11.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22"}, - {file = "pillow-11.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91"}, - {file = "pillow-11.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751"}, - {file = "pillow-11.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9"}, - {file = "pillow-11.2.1-cp313-cp313t-win32.whl", hash = "sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd"}, - {file = "pillow-11.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e"}, - {file = "pillow-11.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681"}, - {file = "pillow-11.2.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:7491cf8a79b8eb867d419648fff2f83cb0b3891c8b36da92cc7f1931d46108c8"}, - {file = "pillow-11.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b02d8f9cb83c52578a0b4beadba92e37d83a4ef11570a8688bbf43f4ca50909"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:014ca0050c85003620526b0ac1ac53f56fc93af128f7546623cc8e31875ab928"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3692b68c87096ac6308296d96354eddd25f98740c9d2ab54e1549d6c8aea9d79"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:f781dcb0bc9929adc77bad571b8621ecb1e4cdef86e940fe2e5b5ee24fd33b35"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:2b490402c96f907a166615e9a5afacf2519e28295f157ec3a2bb9bd57de638cb"}, - {file = "pillow-11.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dd6b20b93b3ccc9c1b597999209e4bc5cf2853f9ee66e3fc9a400a78733ffc9a"}, - {file = "pillow-11.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4b835d89c08a6c2ee7781b8dd0a30209a8012b5f09c0a665b65b0eb3560b6f36"}, - {file = "pillow-11.2.1-cp39-cp39-win32.whl", hash = "sha256:b10428b3416d4f9c61f94b494681280be7686bda15898a3a9e08eb66a6d92d67"}, - {file = "pillow-11.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:6ebce70c3f486acf7591a3d73431fa504a4e18a9b97ff27f5f47b7368e4b9dd1"}, - {file = "pillow-11.2.1-cp39-cp39-win_arm64.whl", hash = "sha256:c27476257b2fdcd7872d54cfd119b3a9ce4610fb85c8e32b70b42e3680a29a1e"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9b7b0d4fd2635f54ad82785d56bc0d94f147096493a79985d0ab57aedd563156"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:aa442755e31c64037aa7c1cb186e0b369f8416c567381852c63444dd666fb772"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0d3348c95b766f54b76116d53d4cb171b52992a1027e7ca50c81b43b9d9e363"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85d27ea4c889342f7e35f6d56e7e1cb345632ad592e8c51b693d7b7556043ce0"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bf2c33d6791c598142f00c9c4c7d47f6476731c31081331664eb26d6ab583e01"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e616e7154c37669fc1dfc14584f11e284e05d1c650e1c0f972f281c4ccc53193"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:39ad2e0f424394e3aebc40168845fee52df1394a4673a6ee512d840d14ab3013"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:80f1df8dbe9572b4b7abdfa17eb5d78dd620b1d55d9e25f834efdbee872d3aed"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ea926cfbc3957090becbcbbb65ad177161a2ff2ad578b5a6ec9bb1e1cd78753c"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:738db0e0941ca0376804d4de6a782c005245264edaa253ffce24e5a15cbdc7bd"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db98ab6565c69082ec9b0d4e40dd9f6181dab0dd236d26f7a50b8b9bfbd5076"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:036e53f4170e270ddb8797d4c590e6dd14d28e15c7da375c18978045f7e6c37b"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:14f73f7c291279bd65fda51ee87affd7c1e097709f7fdd0188957a16c264601f"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:208653868d5c9ecc2b327f9b9ef34e0e42a4cdd172c2988fd81d62d2bc9bc044"}, - {file = "pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6"}, +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, + {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"}, + {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"}, + {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"}, + {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"}, + {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"}, + {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"}, + {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"}, + {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"}, + {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"}, + {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"}, + {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"}, + {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"}, + {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"}, + {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"}, + {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"}, + {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"}, + {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"}, + {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"}, + {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"}, ] [package.extras] -docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] fpx = ["olefile"] mic = ["olefile"] -test-arrow = ["pyarrow"] -tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] -typing = ["typing-extensions"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] @@ -2025,6 +1871,7 @@ version = "4.2.2" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, @@ -2041,6 +1888,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -2056,6 +1904,7 @@ version = "3.1" description = "Pretty print trees" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "pptree-3.1.tar.gz", hash = "sha256:4dd0ba2f58000cbd29d68a5b64bac29bcb5a663642f79404877c0059668a69f6"}, ] @@ -2066,6 +1915,7 @@ version = "3.7.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "pre_commit-3.7.1-py2.py3-none-any.whl", hash = "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5"}, {file = "pre_commit-3.7.1.tar.gz", hash = "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a"}, @@ -2080,54 +1930,65 @@ virtualenv = ">=20.10.0" [[package]] name = "protobuf" -version = "6.30.2" +version = "5.27.2" description = "" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "protobuf-6.30.2-cp310-abi3-win32.whl", hash = "sha256:b12ef7df7b9329886e66404bef5e9ce6a26b54069d7f7436a0853ccdeb91c103"}, - {file = "protobuf-6.30.2-cp310-abi3-win_amd64.whl", hash = "sha256:7653c99774f73fe6b9301b87da52af0e69783a2e371e8b599b3e9cb4da4b12b9"}, - {file = "protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:0eb523c550a66a09a0c20f86dd554afbf4d32b02af34ae53d93268c1f73bc65b"}, - {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:50f32cc9fd9cb09c783ebc275611b4f19dfdfb68d1ee55d2f0c7fa040df96815"}, - {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4f6c687ae8efae6cf6093389a596548214467778146b7245e886f35e1485315d"}, - {file = "protobuf-6.30.2-cp39-cp39-win32.whl", hash = "sha256:524afedc03b31b15586ca7f64d877a98b184f007180ce25183d1a5cb230ee72b"}, - {file = "protobuf-6.30.2-cp39-cp39-win_amd64.whl", hash = "sha256:acec579c39c88bd8fbbacab1b8052c793efe83a0a5bd99db4a31423a25c0a0e2"}, - {file = "protobuf-6.30.2-py3-none-any.whl", hash = "sha256:ae86b030e69a98e08c77beab574cbcb9fff6d031d57209f574a5aea1445f4b51"}, - {file = "protobuf-6.30.2.tar.gz", hash = "sha256:35c859ae076d8c56054c25b59e5e59638d86545ed6e2b6efac6be0b6ea3ba048"}, + {file = "protobuf-5.27.2-cp310-abi3-win32.whl", hash = "sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38"}, + {file = "protobuf-5.27.2-cp310-abi3-win_amd64.whl", hash = "sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505"}, + {file = "protobuf-5.27.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5"}, + {file = "protobuf-5.27.2-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b"}, + {file = "protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e"}, + {file = "protobuf-5.27.2-cp38-cp38-win32.whl", hash = "sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863"}, + {file = "protobuf-5.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6"}, + {file = "protobuf-5.27.2-cp39-cp39-win32.whl", hash = "sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca"}, + {file = "protobuf-5.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce"}, + {file = "protobuf-5.27.2-py3-none-any.whl", hash = "sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470"}, + {file = "protobuf-5.27.2.tar.gz", hash = "sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714"}, ] [[package]] name = "psutil" -version = "7.0.0" -description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." +version = "6.0.0" +description = "Cross-platform lib for process and system monitoring in Python." optional = false -python-versions = ">=3.6" -files = [ - {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, - {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, - {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, - {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, - {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, - {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, - {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["dev"] +files = [ + {file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"}, + {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"}, + {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c"}, + {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3"}, + {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c"}, + {file = "psutil-6.0.0-cp27-none-win32.whl", hash = "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35"}, + {file = "psutil-6.0.0-cp27-none-win_amd64.whl", hash = "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1"}, + {file = "psutil-6.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0"}, + {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0"}, + {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd"}, + {file = "psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132"}, + {file = "psutil-6.0.0-cp36-cp36m-win32.whl", hash = "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14"}, + {file = "psutil-6.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c"}, + {file = "psutil-6.0.0-cp37-abi3-win32.whl", hash = "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d"}, + {file = "psutil-6.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3"}, + {file = "psutil-6.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0"}, + {file = "psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2"}, ] [package.extras] -dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] -test = ["pytest", "pytest-xdist", "setuptools"] +test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""] [[package]] name = "pyparsing" -version = "3.2.3" +version = "3.1.2" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false -python-versions = ">=3.9" +python-versions = ">=3.6.8" +groups = ["dev"] files = [ - {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"}, - {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"}, + {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, + {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, ] [package.extras] @@ -2139,6 +2000,7 @@ version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["dev"] files = [ {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, @@ -2151,6 +2013,7 @@ version = "7.4.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, @@ -2173,6 +2036,7 @@ version = "4.1.0" description = "Pytest plugin for measuring coverage." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, @@ -2191,6 +2055,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2199,43 +2064,13 @@ files = [ [package.dependencies] six = ">=1.5" -[[package]] -name = "pytorch-lightning" -version = "2.2.5" -description = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pytorch-lightning-2.2.5.tar.gz", hash = "sha256:8d06d0166e2204f82864f5d2b53a367c2c375d9cd5a7f6174434b2dffeaef7e9"}, - {file = "pytorch_lightning-2.2.5-py3-none-any.whl", hash = "sha256:67a7800863326914f68f6afd68f427855ef2315b4f00d554be8ea4c0f0557fd8"}, -] - -[package.dependencies] -fsspec = {version = ">=2022.5.0", extras = ["http"]} -lightning-utilities = ">=0.8.0" -numpy = ">=1.17.2" -packaging = ">=20.0" -PyYAML = ">=5.4" -torch = ">=1.13.0" -torchmetrics = ">=0.7.0" -tqdm = ">=4.57.0" -typing-extensions = ">=4.4.0" - -[package.extras] -all = ["bitsandbytes (==0.41.0)", "deepspeed (>=0.8.2,<=0.9.3)", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.15.0)", "jsonargparse[signatures] (>=4.27.7)", "lightning-utilities (>=0.8.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "requests (<2.32.0)", "rich (>=12.3.0)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)"] -deepspeed = ["deepspeed (>=0.8.2,<=0.9.3)"] -dev = ["bitsandbytes (==0.41.0)", "cloudpickle (>=1.3)", "coverage (==7.3.1)", "deepspeed (>=0.8.2,<=0.9.3)", "fastapi", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.15.0)", "jsonargparse[signatures] (>=4.27.7)", "lightning-utilities (>=0.8.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "onnx (>=0.14.0)", "onnxruntime (>=0.15.0)", "pandas (>1.0)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-random-order (==1.1.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "requests (<2.32.0)", "rich (>=12.3.0)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)", "uvicorn"] -examples = ["gym[classic-control] (>=0.17.0)", "ipython[all] (<8.15.0)", "lightning-utilities (>=0.8.0)", "requests (<2.32.0)", "torchmetrics (>=0.10.0)", "torchvision (>=0.14.0)"] -extra = ["bitsandbytes (==0.41.0)", "hydra-core (>=1.0.5)", "jsonargparse[signatures] (>=4.27.7)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "rich (>=12.3.0)", "tensorboardX (>=2.2)"] -strategies = ["deepspeed (>=0.8.2,<=0.9.3)"] -test = ["cloudpickle (>=1.3)", "coverage (==7.3.1)", "fastapi", "onnx (>=0.14.0)", "onnxruntime (>=0.15.0)", "pandas (>1.0)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-random-order (==1.1.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "uvicorn"] - [[package]] name = "pytorch-revgrad" version = "0.2.0" description = "A pytorch module (and function) to reverse gradients." optional = false python-versions = ">=3.5" +groups = ["dev"] files = [ {file = "pytorch_revgrad-0.2.0-py3-none-any.whl", hash = "sha256:2276fb189b2ce26f756a97effe2a6bcf8f7fdc60542c5dfb45c53f09ef123aa7"}, {file = "pytorch_revgrad-0.2.0.tar.gz", hash = "sha256:9cf097a7d18cbadddeaec9fef74b258d70b6cb8d0c77f524baab18bffc7d7be9"}, @@ -2251,13 +2086,14 @@ test = ["coveralls", "flake8", "pytest", "pytest-cov", "pytest-flake8"] [[package]] name = "pytz" -version = "2025.2" +version = "2024.1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main"] files = [ - {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, - {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] [[package]] @@ -2266,6 +2102,7 @@ version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" +groups = ["main", "dev"] files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, @@ -2322,105 +2159,91 @@ files = [ [[package]] name = "regex" -version = "2024.11.6" +version = "2024.5.15" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" -files = [ - {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, - {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, - {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"}, - {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"}, - {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"}, - {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"}, - {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"}, - {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"}, - {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"}, - {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"}, - {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"}, - {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"}, - {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"}, - {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"}, - {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"}, - {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"}, - {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"}, - {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"}, - {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"}, - {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"}, - {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"}, - {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"}, - {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"}, - {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"}, - {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"}, - {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"}, - {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"}, - {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"}, - {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"}, - {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"}, - {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"}, - {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"}, - {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"}, - {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"}, - {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"}, - {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"}, - {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"}, - {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"}, - {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"}, - {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"}, - {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"}, - {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"}, - {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"}, - {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"}, - {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"}, - {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"}, - {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"}, - {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"}, - {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"}, - {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"}, - {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"}, - {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"}, - {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"}, - {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"}, - {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"}, - {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"}, - {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"}, - {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"}, - {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"}, - {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"}, - {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"}, - {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"}, - {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"}, - {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"}, - {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"}, - {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"}, - {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"}, - {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"}, - {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"}, - {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"}, - {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"}, - {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"}, - {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"}, - {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"}, - {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"}, - {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"}, - {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"}, - {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"}, - {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"}, - {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"}, - {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"}, - {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"}, - {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"}, - {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"}, - {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"}, - {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"}, - {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"}, - {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"}, - {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"}, - {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"}, - {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"}, - {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"}, - {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"}, - {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"}, +groups = ["dev"] +files = [ + {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, + {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"}, + {file = "regex-2024.5.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0bd000c6e266927cb7a1bc39d55be95c4b4f65c5be53e659537537e019232b1"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eaa7ddaf517aa095fa8da0b5015c44d03da83f5bd49c87961e3c997daed0de7"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba68168daedb2c0bab7fd7e00ced5ba90aebf91024dea3c88ad5063c2a562cca"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e8d717bca3a6e2064fc3a08df5cbe366369f4b052dcd21b7416e6d71620dca1"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1337b7dbef9b2f71121cdbf1e97e40de33ff114801263b275aafd75303bd62b5"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9ebd0a36102fcad2f03696e8af4ae682793a5d30b46c647eaf280d6cfb32796"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9efa1a32ad3a3ea112224897cdaeb6aa00381627f567179c0314f7b65d354c62"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1595f2d10dff3d805e054ebdc41c124753631b6a471b976963c7b28543cf13b0"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b802512f3e1f480f41ab5f2cfc0e2f761f08a1f41092d6718868082fc0d27143"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a0981022dccabca811e8171f913de05720590c915b033b7e601f35ce4ea7019f"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:19068a6a79cf99a19ccefa44610491e9ca02c2be3305c7760d3831d38a467a6f"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b5269484f6126eee5e687785e83c6b60aad7663dafe842b34691157e5083e53"}, + {file = "regex-2024.5.15-cp310-cp310-win32.whl", hash = "sha256:ada150c5adfa8fbcbf321c30c751dc67d2f12f15bd183ffe4ec7cde351d945b3"}, + {file = "regex-2024.5.15-cp310-cp310-win_amd64.whl", hash = "sha256:ac394ff680fc46b97487941f5e6ae49a9f30ea41c6c6804832063f14b2a5a145"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f5b1dff3ad008dccf18e652283f5e5339d70bf8ba7c98bf848ac33db10f7bc7a"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6a2b494a76983df8e3d3feea9b9ffdd558b247e60b92f877f93a1ff43d26656"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a32b96f15c8ab2e7d27655969a23895eb799de3665fa94349f3b2fbfd547236f"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10002e86e6068d9e1c91eae8295ef690f02f913c57db120b58fdd35a6bb1af35"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec54d5afa89c19c6dd8541a133be51ee1017a38b412b1321ccb8d6ddbeb4cf7d"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10e4ce0dca9ae7a66e6089bb29355d4432caed736acae36fef0fdd7879f0b0cb"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e507ff1e74373c4d3038195fdd2af30d297b4f0950eeda6f515ae3d84a1770f"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1f059a4d795e646e1c37665b9d06062c62d0e8cc3c511fe01315973a6542e40"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0721931ad5fe0dda45d07f9820b90b2148ccdd8e45bb9e9b42a146cb4f695649"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:833616ddc75ad595dee848ad984d067f2f31be645d603e4d158bba656bbf516c"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:287eb7f54fc81546346207c533ad3c2c51a8d61075127d7f6d79aaf96cdee890"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:19dfb1c504781a136a80ecd1fff9f16dddf5bb43cec6871778c8a907a085bb3d"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:119af6e56dce35e8dfb5222573b50c89e5508d94d55713c75126b753f834de68"}, + {file = "regex-2024.5.15-cp311-cp311-win32.whl", hash = "sha256:1c1c174d6ec38d6c8a7504087358ce9213d4332f6293a94fbf5249992ba54efa"}, + {file = "regex-2024.5.15-cp311-cp311-win_amd64.whl", hash = "sha256:9e717956dcfd656f5055cc70996ee2cc82ac5149517fc8e1b60261b907740201"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:632b01153e5248c134007209b5c6348a544ce96c46005d8456de1d552455b014"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e64198f6b856d48192bf921421fdd8ad8eb35e179086e99e99f711957ffedd6e"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68811ab14087b2f6e0fc0c2bae9ad689ea3584cad6917fc57be6a48bbd012c49"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ec0c2fea1e886a19c3bee0cd19d862b3aa75dcdfb42ebe8ed30708df64687a"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0c0c0003c10f54a591d220997dd27d953cd9ccc1a7294b40a4be5312be8797b"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2431b9e263af1953c55abbd3e2efca67ca80a3de8a0437cb58e2421f8184717a"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a605586358893b483976cffc1723fb0f83e526e8f14c6e6614e75919d9862cf"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391d7f7f1e409d192dba8bcd42d3e4cf9e598f3979cdaed6ab11288da88cb9f2"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ff11639a8d98969c863d4617595eb5425fd12f7c5ef6621a4b74b71ed8726d5"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4eee78a04e6c67e8391edd4dad3279828dd66ac4b79570ec998e2155d2e59fd5"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8fe45aa3f4aa57faabbc9cb46a93363edd6197cbc43523daea044e9ff2fea83e"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d0a3d8d6acf0c78a1fff0e210d224b821081330b8524e3e2bc5a68ef6ab5803d"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c486b4106066d502495b3025a0a7251bf37ea9540433940a23419461ab9f2a80"}, + {file = "regex-2024.5.15-cp312-cp312-win32.whl", hash = "sha256:c49e15eac7c149f3670b3e27f1f28a2c1ddeccd3a2812cba953e01be2ab9b5fe"}, + {file = "regex-2024.5.15-cp312-cp312-win_amd64.whl", hash = "sha256:673b5a6da4557b975c6c90198588181029c60793835ce02f497ea817ff647cb2"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:87e2a9c29e672fc65523fb47a90d429b70ef72b901b4e4b1bd42387caf0d6835"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c3bea0ba8b73b71b37ac833a7f3fd53825924165da6a924aec78c13032f20850"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfc4f82cabe54f1e7f206fd3d30fda143f84a63fe7d64a81558d6e5f2e5aaba9"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5bb9425fe881d578aeca0b2b4b3d314ec88738706f66f219c194d67179337cb"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64c65783e96e563103d641760664125e91bd85d8e49566ee560ded4da0d3e704"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf2430df4148b08fb4324b848672514b1385ae3807651f3567871f130a728cc3"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5397de3219a8b08ae9540c48f602996aa6b0b65d5a61683e233af8605c42b0f2"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:455705d34b4154a80ead722f4f185b04c4237e8e8e33f265cd0798d0e44825fa"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2b6f1b3bb6f640c1a92be3bbfbcb18657b125b99ecf141fb3310b5282c7d4ed"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3ad070b823ca5890cab606c940522d05d3d22395d432f4aaaf9d5b1653e47ced"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5b5467acbfc153847d5adb21e21e29847bcb5870e65c94c9206d20eb4e99a384"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e6662686aeb633ad65be2a42b4cb00178b3fbf7b91878f9446075c404ada552f"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2b4c884767504c0e2401babe8b5b7aea9148680d2e157fa28f01529d1f7fcf67"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3cd7874d57f13bf70078f1ff02b8b0aa48d5b9ed25fc48547516c6aba36f5741"}, + {file = "regex-2024.5.15-cp38-cp38-win32.whl", hash = "sha256:e4682f5ba31f475d58884045c1a97a860a007d44938c4c0895f41d64481edbc9"}, + {file = "regex-2024.5.15-cp38-cp38-win_amd64.whl", hash = "sha256:d99ceffa25ac45d150e30bd9ed14ec6039f2aad0ffa6bb87a5936f5782fc1569"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13cdaf31bed30a1e1c2453ef6015aa0983e1366fad2667657dbcac7b02f67133"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cac27dcaa821ca271855a32188aa61d12decb6fe45ffe3e722401fe61e323cd1"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7dbe2467273b875ea2de38ded4eba86cbcbc9a1a6d0aa11dcf7bd2e67859c435"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f18a9a3513a99c4bef0e3efd4c4a5b11228b48aa80743be822b71e132ae4f5"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d347a741ea871c2e278fde6c48f85136c96b8659b632fb57a7d1ce1872547600"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1878b8301ed011704aea4c806a3cadbd76f84dece1ec09cc9e4dc934cfa5d4da"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4babf07ad476aaf7830d77000874d7611704a7fcf68c9c2ad151f5d94ae4bfc4"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35cb514e137cb3488bce23352af3e12fb0dbedd1ee6e60da053c69fb1b29cc6c"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cdd09d47c0b2efee9378679f8510ee6955d329424c659ab3c5e3a6edea696294"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:72d7a99cd6b8f958e85fc6ca5b37c4303294954eac1376535b03c2a43eb72629"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a094801d379ab20c2135529948cb84d417a2169b9bdceda2a36f5f10977ebc16"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c0c18345010870e58238790a6779a1219b4d97bd2e77e1140e8ee5d14df071aa"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:16093f563098448ff6b1fa68170e4acbef94e6b6a4e25e10eae8598bb1694b5d"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e38a7d4e8f633a33b4c7350fbd8bad3b70bf81439ac67ac38916c4a86b465456"}, + {file = "regex-2024.5.15-cp39-cp39-win32.whl", hash = "sha256:71a455a3c584a88f654b64feccc1e25876066c4f5ef26cd6dd711308aa538694"}, + {file = "regex-2024.5.15-cp39-cp39-win_amd64.whl", hash = "sha256:cab12877a9bdafde5500206d1020a584355a97884dfd388af3699e9137bf7388"}, + {file = "regex-2024.5.15.tar.gz", hash = "sha256:d3ee02d9e5f482cc8309134a91eeaacbdd2261ba111b0fef3748eeb4913e6a2c"}, ] [[package]] @@ -2429,6 +2252,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -2447,43 +2271,130 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "s3transfer" -version = "0.12.0" +version = "0.10.2" description = "An Amazon S3 Transfer Manager" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "s3transfer-0.12.0-py3-none-any.whl", hash = "sha256:35b314d7d82865756edab59f7baebc6b477189e6ab4c53050e28c1de4d9cce18"}, - {file = "s3transfer-0.12.0.tar.gz", hash = "sha256:8ac58bc1989a3fdb7c7f3ee0918a66b160d038a147c7b5db1500930a607e9a1c"}, + {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"}, + {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"}, ] [package.dependencies] -botocore = ">=1.37.4,<2.0a.0" +botocore = ">=1.33.2,<2.0a.0" [package.extras] -crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "safetensors" -version = "0.5.3" +version = "0.4.3" description = "" optional = false python-versions = ">=3.7" -files = [ - {file = "safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073"}, - {file = "safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7"}, - {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11bce6164887cd491ca75c2326a113ba934be596e22b28b1742ce27b1d076467"}, - {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4a243be3590bc3301c821da7a18d87224ef35cbd3e5f5727e4e0728b8172411e"}, - {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8bd84b12b1670a6f8e50f01e28156422a2bc07fb16fc4e98bded13039d688a0d"}, - {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:391ac8cab7c829452175f871fcaf414aa1e292b5448bd02620f675a7f3e7abb9"}, - {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cead1fa41fc54b1e61089fa57452e8834f798cb1dc7a09ba3524f1eb08e0317a"}, - {file = "safetensors-0.5.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1077f3e94182d72618357b04b5ced540ceb71c8a813d3319f1aba448e68a770d"}, - {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:799021e78287bac619c7b3f3606730a22da4cda27759ddf55d37c8db7511c74b"}, - {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:df26da01aaac504334644e1b7642fa000bfec820e7cef83aeac4e355e03195ff"}, - {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:32c3ef2d7af8b9f52ff685ed0bc43913cdcde135089ae322ee576de93eae5135"}, - {file = "safetensors-0.5.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:37f1521be045e56fc2b54c606d4455573e717b2d887c579ee1dbba5f868ece04"}, - {file = "safetensors-0.5.3-cp38-abi3-win32.whl", hash = "sha256:cfc0ec0846dcf6763b0ed3d1846ff36008c6e7290683b61616c4b040f6a54ace"}, - {file = "safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11"}, - {file = "safetensors-0.5.3.tar.gz", hash = "sha256:b6b0d6ecacec39a4fdd99cc19f4576f5219ce858e6fd8dbe7609df0b8dc56965"}, +groups = ["dev"] +files = [ + {file = "safetensors-0.4.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dcf5705cab159ce0130cd56057f5f3425023c407e170bca60b4868048bae64fd"}, + {file = "safetensors-0.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bb4f8c5d0358a31e9a08daeebb68f5e161cdd4018855426d3f0c23bb51087055"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70a5319ef409e7f88686a46607cbc3c428271069d8b770076feaf913664a07ac"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb9c65bd82f9ef3ce4970dc19ee86be5f6f93d032159acf35e663c6bea02b237"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edb5698a7bc282089f64c96c477846950358a46ede85a1c040e0230344fdde10"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:efcc860be094b8d19ac61b452ec635c7acb9afa77beb218b1d7784c6d41fe8ad"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d88b33980222085dd6001ae2cad87c6068e0991d4f5ccf44975d216db3b57376"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5fc6775529fb9f0ce2266edd3e5d3f10aab068e49f765e11f6f2a63b5367021d"}, + {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9c6ad011c1b4e3acff058d6b090f1da8e55a332fbf84695cf3100c649cc452d1"}, + {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c496c5401c1b9c46d41a7688e8ff5b0310a3b9bae31ce0f0ae870e1ea2b8caf"}, + {file = "safetensors-0.4.3-cp310-none-win32.whl", hash = "sha256:38e2a8666178224a51cca61d3cb4c88704f696eac8f72a49a598a93bbd8a4af9"}, + {file = "safetensors-0.4.3-cp310-none-win_amd64.whl", hash = "sha256:393e6e391467d1b2b829c77e47d726f3b9b93630e6a045b1d1fca67dc78bf632"}, + {file = "safetensors-0.4.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:22f3b5d65e440cec0de8edaa672efa888030802e11c09b3d6203bff60ebff05a"}, + {file = "safetensors-0.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c4fa560ebd4522adddb71dcd25d09bf211b5634003f015a4b815b7647d62ebe"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9afd5358719f1b2cf425fad638fc3c887997d6782da317096877e5b15b2ce93"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d8c5093206ef4b198600ae484230402af6713dab1bd5b8e231905d754022bec7"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0b2104df1579d6ba9052c0ae0e3137c9698b2d85b0645507e6fd1813b70931a"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8cf18888606dad030455d18f6c381720e57fc6a4170ee1966adb7ebc98d4d6a3"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0bf4f9d6323d9f86eef5567eabd88f070691cf031d4c0df27a40d3b4aaee755b"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:585c9ae13a205807b63bef8a37994f30c917ff800ab8a1ca9c9b5d73024f97ee"}, + {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faefeb3b81bdfb4e5a55b9bbdf3d8d8753f65506e1d67d03f5c851a6c87150e9"}, + {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:befdf0167ad626f22f6aac6163477fcefa342224a22f11fdd05abb3995c1783c"}, + {file = "safetensors-0.4.3-cp311-none-win32.whl", hash = "sha256:a7cef55929dcbef24af3eb40bedec35d82c3c2fa46338bb13ecf3c5720af8a61"}, + {file = "safetensors-0.4.3-cp311-none-win_amd64.whl", hash = "sha256:840b7ac0eff5633e1d053cc9db12fdf56b566e9403b4950b2dc85393d9b88d67"}, + {file = "safetensors-0.4.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:22d21760dc6ebae42e9c058d75aa9907d9f35e38f896e3c69ba0e7b213033856"}, + {file = "safetensors-0.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d22c1a10dff3f64d0d68abb8298a3fd88ccff79f408a3e15b3e7f637ef5c980"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1648568667f820b8c48317c7006221dc40aced1869908c187f493838a1362bc"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:446e9fe52c051aeab12aac63d1017e0f68a02a92a027b901c4f8e931b24e5397"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fef5d70683643618244a4f5221053567ca3e77c2531e42ad48ae05fae909f542"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a1f4430cc0c9d6afa01214a4b3919d0a029637df8e09675ceef1ca3f0dfa0df"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d603846a8585b9432a0fd415db1d4c57c0f860eb4aea21f92559ff9902bae4d"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a844cdb5d7cbc22f5f16c7e2a0271170750763c4db08381b7f696dbd2c78a361"}, + {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:88887f69f7a00cf02b954cdc3034ffb383b2303bc0ab481d4716e2da51ddc10e"}, + {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ee463219d9ec6c2be1d331ab13a8e0cd50d2f32240a81d498266d77d07b7e71e"}, + {file = "safetensors-0.4.3-cp312-none-win32.whl", hash = "sha256:d0dd4a1db09db2dba0f94d15addc7e7cd3a7b0d393aa4c7518c39ae7374623c3"}, + {file = "safetensors-0.4.3-cp312-none-win_amd64.whl", hash = "sha256:d14d30c25897b2bf19b6fb5ff7e26cc40006ad53fd4a88244fdf26517d852dd7"}, + {file = "safetensors-0.4.3-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d1456f814655b224d4bf6e7915c51ce74e389b413be791203092b7ff78c936dd"}, + {file = "safetensors-0.4.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:455d538aa1aae4a8b279344a08136d3f16334247907b18a5c3c7fa88ef0d3c46"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf476bca34e1340ee3294ef13e2c625833f83d096cfdf69a5342475602004f95"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02ef3a24face643456020536591fbd3c717c5abaa2737ec428ccbbc86dffa7a4"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7de32d0d34b6623bb56ca278f90db081f85fb9c5d327e3c18fd23ac64f465768"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a0deb16a1d3ea90c244ceb42d2c6c276059616be21a19ac7101aa97da448faf"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c59d51f182c729f47e841510b70b967b0752039f79f1de23bcdd86462a9b09ee"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f598b713cc1a4eb31d3b3203557ac308acf21c8f41104cdd74bf640c6e538e3"}, + {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5757e4688f20df083e233b47de43845d1adb7e17b6cf7da5f8444416fc53828d"}, + {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fe746d03ed8d193674a26105e4f0fe6c726f5bb602ffc695b409eaf02f04763d"}, + {file = "safetensors-0.4.3-cp37-none-win32.whl", hash = "sha256:0d5ffc6a80f715c30af253e0e288ad1cd97a3d0086c9c87995e5093ebc075e50"}, + {file = "safetensors-0.4.3-cp37-none-win_amd64.whl", hash = "sha256:a11c374eb63a9c16c5ed146457241182f310902bd2a9c18255781bb832b6748b"}, + {file = "safetensors-0.4.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:b1e31be7945f66be23f4ec1682bb47faa3df34cb89fc68527de6554d3c4258a4"}, + {file = "safetensors-0.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:03a4447c784917c9bf01d8f2ac5080bc15c41692202cd5f406afba16629e84d6"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d244bcafeb1bc06d47cfee71727e775bca88a8efda77a13e7306aae3813fa7e4"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53c4879b9c6bd7cd25d114ee0ef95420e2812e676314300624594940a8d6a91f"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74707624b81f1b7f2b93f5619d4a9f00934d5948005a03f2c1845ffbfff42212"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d52c958dc210265157573f81d34adf54e255bc2b59ded6218500c9b15a750eb"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f9568f380f513a60139971169c4a358b8731509cc19112369902eddb33faa4d"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0d9cd8e1560dfc514b6d7859247dc6a86ad2f83151a62c577428d5102d872721"}, + {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:89f9f17b0dacb913ed87d57afbc8aad85ea42c1085bd5de2f20d83d13e9fc4b2"}, + {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1139eb436fd201c133d03c81209d39ac57e129f5e74e34bb9ab60f8d9b726270"}, + {file = "safetensors-0.4.3-cp38-none-win32.whl", hash = "sha256:d9c289f140a9ae4853fc2236a2ffc9a9f2d5eae0cb673167e0f1b8c18c0961ac"}, + {file = "safetensors-0.4.3-cp38-none-win_amd64.whl", hash = "sha256:622afd28968ef3e9786562d352659a37de4481a4070f4ebac883f98c5836563e"}, + {file = "safetensors-0.4.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8651c7299cbd8b4161a36cd6a322fa07d39cd23535b144d02f1c1972d0c62f3c"}, + {file = "safetensors-0.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e375d975159ac534c7161269de24ddcd490df2157b55c1a6eeace6cbb56903f0"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:084fc436e317f83f7071fc6a62ca1c513b2103db325cd09952914b50f51cf78f"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:41a727a7f5e6ad9f1db6951adee21bbdadc632363d79dc434876369a17de6ad6"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7dbbde64b6c534548696808a0e01276d28ea5773bc9a2dfb97a88cd3dffe3df"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bbae3b4b9d997971431c346edbfe6e41e98424a097860ee872721e176040a893"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01e4b22e3284cd866edeabe4f4d896229495da457229408d2e1e4810c5187121"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dd37306546b58d3043eb044c8103a02792cc024b51d1dd16bd3dd1f334cb3ed"}, + {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d8815b5e1dac85fc534a97fd339e12404db557878c090f90442247e87c8aeaea"}, + {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e011cc162503c19f4b1fd63dfcddf73739c7a243a17dac09b78e57a00983ab35"}, + {file = "safetensors-0.4.3-cp39-none-win32.whl", hash = "sha256:01feb3089e5932d7e662eda77c3ecc389f97c0883c4a12b5cfdc32b589a811c3"}, + {file = "safetensors-0.4.3-cp39-none-win_amd64.whl", hash = "sha256:3f9cdca09052f585e62328c1c2923c70f46814715c795be65f0b93f57ec98a02"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1b89381517891a7bb7d1405d828b2bf5d75528299f8231e9346b8eba092227f9"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cd6fff9e56df398abc5866b19a32124815b656613c1c5ec0f9350906fd798aac"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:840caf38d86aa7014fe37ade5d0d84e23dcfbc798b8078015831996ecbc206a3"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9650713b2cfa9537a2baf7dd9fee458b24a0aaaa6cafcea8bdd5fb2b8efdc34"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4119532cd10dba04b423e0f86aecb96cfa5a602238c0aa012f70c3a40c44b50"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e066e8861eef6387b7c772344d1fe1f9a72800e04ee9a54239d460c400c72aab"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:90964917f5b0fa0fa07e9a051fbef100250c04d150b7026ccbf87a34a54012e0"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c41e1893d1206aa7054029681778d9a58b3529d4c807002c156d58426c225173"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae7613a119a71a497d012ccc83775c308b9c1dab454806291427f84397d852fd"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9bac020faba7f5dc481e881b14b6425265feabb5bfc552551d21189c0eddc3"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:420a98f593ff9930f5822560d14c395ccbc57342ddff3b463bc0b3d6b1951550"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f5e6883af9a68c0028f70a4c19d5a6ab6238a379be36ad300a22318316c00cb0"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:cdd0a3b5da66e7f377474599814dbf5cbf135ff059cc73694de129b58a5e8a2c"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9bfb92f82574d9e58401d79c70c716985dc049b635fef6eecbb024c79b2c46ad"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:3615a96dd2dcc30eb66d82bc76cda2565f4f7bfa89fcb0e31ba3cea8a1a9ecbb"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868ad1b6fc41209ab6bd12f63923e8baeb1a086814cb2e81a65ed3d497e0cf8f"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7ffba80aa49bd09195145a7fd233a7781173b422eeb995096f2b30591639517"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0acbe31340ab150423347e5b9cc595867d814244ac14218932a5cf1dd38eb39"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:19bbdf95de2cf64f25cd614c5236c8b06eb2cfa47cbf64311f4b5d80224623a3"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b852e47eb08475c2c1bd8131207b405793bfc20d6f45aff893d3baaad449ed14"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5d07cbca5b99babb692d76d8151bec46f461f8ad8daafbfd96b2fca40cadae65"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1ab6527a20586d94291c96e00a668fa03f86189b8a9defa2cdd34a1a01acc7d5"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02318f01e332cc23ffb4f6716e05a492c5f18b1d13e343c49265149396284a44"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec4b52ce9a396260eb9731eb6aea41a7320de22ed73a1042c2230af0212758ce"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:018b691383026a2436a22b648873ed11444a364324e7088b99cd2503dd828400"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:309b10dbcab63269ecbf0e2ca10ce59223bb756ca5d431ce9c9eeabd446569da"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b277482120df46e27a58082df06a15aebda4481e30a1c21eefd0921ae7e03f65"}, + {file = "safetensors-0.4.3.tar.gz", hash = "sha256:2f85fc50c4e07a21e95c24e07460fe6f7e2859d0ce88092838352b798ce711c2"}, ] [package.extras] @@ -2493,7 +2404,7 @@ jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[num mlx = ["mlx (>=0.0.9)"] numpy = ["numpy (>=1.21.6)"] paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] -pinned-tf = ["safetensors[numpy]", "tensorflow (==2.18.0)"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"] @@ -2501,41 +2412,33 @@ torch = ["safetensors[numpy]", "torch (>=1.10)"] [[package]] name = "scikit-learn" -version = "1.6.1" +version = "1.5.1" description = "A set of python modules for machine learning and data mining" optional = false python-versions = ">=3.9" -files = [ - {file = "scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e"}, - {file = "scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36"}, - {file = "scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8634c4bd21a2a813e0a7e3900464e6d593162a29dd35d25bdf0103b3fce60ed5"}, - {file = "scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775da975a471c4f6f467725dff0ced5c7ac7bda5e9316b260225b48475279a1b"}, - {file = "scikit_learn-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:8a600c31592bd7dab31e1c61b9bbd6dea1b3433e67d264d17ce1017dbdce8002"}, - {file = "scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33"}, - {file = "scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d"}, - {file = "scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2"}, - {file = "scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8"}, - {file = "scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415"}, - {file = "scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b"}, - {file = "scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2"}, - {file = "scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f"}, - {file = "scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86"}, - {file = "scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52"}, - {file = "scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322"}, - {file = "scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1"}, - {file = "scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348"}, - {file = "scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97"}, - {file = "scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb"}, - {file = "scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236"}, - {file = "scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35"}, - {file = "scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691"}, - {file = "scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f"}, - {file = "scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6849dd3234e87f55dce1db34c89a810b489ead832aaf4d4550b7ea85628be6c1"}, - {file = "scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e"}, - {file = "scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44a17798172df1d3c1065e8fcf9019183f06c87609b49a124ebdf57ae6cb0107"}, - {file = "scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b7a3b86e411e4bce21186e1c180d792f3d99223dcfa3b4f597ecc92fa1a422"}, - {file = "scikit_learn-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:7a73d457070e3318e32bdb3aa79a8d990474f19035464dfd8bede2883ab5dc3b"}, - {file = "scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e"}, +groups = ["dev"] +files = [ + {file = "scikit_learn-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:781586c414f8cc58e71da4f3d7af311e0505a683e112f2f62919e3019abd3745"}, + {file = "scikit_learn-1.5.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5b213bc29cc30a89a3130393b0e39c847a15d769d6e59539cd86b75d276b1a7"}, + {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff4ba34c2abff5ec59c803ed1d97d61b036f659a17f55be102679e88f926fac"}, + {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:161808750c267b77b4a9603cf9c93579c7a74ba8486b1336034c2f1579546d21"}, + {file = "scikit_learn-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:10e49170691514a94bb2e03787aa921b82dbc507a4ea1f20fd95557862c98dc1"}, + {file = "scikit_learn-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:154297ee43c0b83af12464adeab378dee2d0a700ccd03979e2b821e7dd7cc1c2"}, + {file = "scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b5e865e9bd59396220de49cb4a57b17016256637c61b4c5cc81aaf16bc123bbe"}, + {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909144d50f367a513cee6090873ae582dba019cb3fca063b38054fa42704c3a4"}, + {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689b6f74b2c880276e365fe84fe4f1befd6a774f016339c65655eaff12e10cbf"}, + {file = "scikit_learn-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:9a07f90846313a7639af6a019d849ff72baadfa4c74c778821ae0fad07b7275b"}, + {file = "scikit_learn-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5944ce1faada31c55fb2ba20a5346b88e36811aab504ccafb9f0339e9f780395"}, + {file = "scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0828673c5b520e879f2af6a9e99eee0eefea69a2188be1ca68a6121b809055c1"}, + {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508907e5f81390e16d754e8815f7497e52139162fd69c4fdbd2dfa5d6cc88915"}, + {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97625f217c5c0c5d0505fa2af28ae424bd37949bb2f16ace3ff5f2f81fb4498b"}, + {file = "scikit_learn-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:da3f404e9e284d2b0a157e1b56b6566a34eb2798205cba35a211df3296ab7a74"}, + {file = "scikit_learn-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88e0672c7ac21eb149d409c74cc29f1d611d5158175846e7a9c2427bd12b3956"}, + {file = "scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7b073a27797a283187a4ef4ee149959defc350b46cbf63a84d8514fe16b69855"}, + {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b59e3e62d2be870e5c74af4e793293753565c7383ae82943b83383fdcf5cc5c1"}, + {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd8d3a19d4bd6dc5a7d4f358c8c3a60934dc058f363c34c0ac1e9e12a31421d"}, + {file = "scikit_learn-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:5f57428de0c900a98389c4a433d4a3cf89de979b3aa24d1c1d251802aa15e44d"}, + {file = "scikit_learn-1.5.1.tar.gz", hash = "sha256:0ea5d40c0e3951df445721927448755d3fe1d80833b0b7308ebff5d2a45e6414"}, ] [package.dependencies] @@ -2547,11 +2450,11 @@ threadpoolctl = ">=3.1.0" [package.extras] benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] -docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.17.1)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)", "towncrier (>=24.8.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] maintenance = ["conda-lock (==2.5.6)"] -tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.5.1)", "scikit-image (>=0.17.2)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] [[package]] name = "scipy" @@ -2559,6 +2462,7 @@ version = "1.13.1" description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, @@ -2601,6 +2505,7 @@ version = "1.5.11" description = "sentence segmentation and word tokenization tools" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "segtok-1.5.11-py3-none-any.whl", hash = "sha256:910616b76198c3141b2772df530270d3b706e42ae69a5b30ef115c7bd5d1501a"}, {file = "segtok-1.5.11.tar.gz", hash = "sha256:8ab2dd44245bcbfec25b575dc4618473bbdf2af8c2649698cd5a370f42f3db23"}, @@ -2611,13 +2516,14 @@ regex = "*" [[package]] name = "semver" -version = "3.0.4" +version = "3.0.2" description = "Python helper for Semantic Versioning (https://semver.org)" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ - {file = "semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746"}, - {file = "semver-3.0.4.tar.gz", hash = "sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602"}, + {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"}, + {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"}, ] [[package]] @@ -2626,6 +2532,7 @@ version = "0.2.0" description = "SentencePiece python wrapper" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227"}, {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452"}, @@ -2688,6 +2595,8 @@ version = "70.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version >= \"3.12\"" files = [ {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"}, {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"}, @@ -2695,28 +2604,30 @@ files = [ [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov ; platform_python_implementation != \"PyPy\"", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "six" -version = "1.17.0" +version = "1.16.0" description = "Python 2 and 3 compatibility utilities" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main", "dev"] files = [ - {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, - {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] [[package]] name = "smart-open" -version = "7.1.0" +version = "7.0.4" description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" optional = false python-versions = "<4.0,>=3.7" +groups = ["dev"] files = [ - {file = "smart_open-7.1.0-py3-none-any.whl", hash = "sha256:4b8489bb6058196258bafe901730c7db0dcf4f083f316e97269c66f45502055b"}, - {file = "smart_open-7.1.0.tar.gz", hash = "sha256:a4f09f84f0f6d3637c6543aca7b5487438877a21360e7368ccf1f704789752ba"}, + {file = "smart_open-7.0.4-py3-none-any.whl", hash = "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47"}, + {file = "smart_open-7.0.4.tar.gz", hash = "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524"}, ] [package.dependencies] @@ -2729,19 +2640,20 @@ gcs = ["google-cloud-storage (>=2.6.0)"] http = ["requests"] s3 = ["boto3"] ssh = ["paramiko"] -test = ["awscli", "azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "numpy", "paramiko", "pyopenssl", "pytest", "pytest-benchmark", "pytest-rerunfailures", "requests", "responses", "zstandard"] +test = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "paramiko", "pytest", "pytest-rerunfailures", "requests", "responses", "zstandard"] webhdfs = ["requests"] zst = ["zstandard"] [[package]] name = "soupsieve" -version = "2.7" +version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, - {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, ] [[package]] @@ -2750,23 +2662,28 @@ version = "2.1.0" description = "Persistent dict in Python, backed up by sqlite3 and pickle, multithread-safe." optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c"}, ] [[package]] name = "sympy" -version = "1.12.1" +version = "1.14.0" description = "Computer algebra system (CAS) in Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" +groups = ["main", "dev"] files = [ - {file = "sympy-1.12.1-py3-none-any.whl", hash = "sha256:9b2cbc7f1a640289430e13d2a56f02f867a1da0190f2f99d8968c2f74da0e515"}, - {file = "sympy-1.12.1.tar.gz", hash = "sha256:2877b03f998cd8c08f07cd0de5b767119cd3ef40d09f41c30d722f6686b0fb88"}, + {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, + {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, ] [package.dependencies] -mpmath = ">=1.1.0,<1.4.0" +mpmath = ">=1.1.0,<1.4" + +[package.extras] +dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] [[package]] name = "tabulate" @@ -2774,6 +2691,7 @@ version = "0.9.0" description = "Pretty-print tabular data" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, @@ -2782,28 +2700,16 @@ files = [ [package.extras] widechars = ["wcwidth"] -[[package]] -name = "tbb" -version = "2021.12.0" -description = "IntelĀ® oneAPI Threading Building Blocks (oneTBB)" -optional = false -python-versions = "*" -files = [ - {file = "tbb-2021.12.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:f2cc9a7f8ababaa506cbff796ce97c3bf91062ba521e15054394f773375d81d8"}, - {file = "tbb-2021.12.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:a925e9a7c77d3a46ae31c34b0bb7f801c4118e857d137b68f68a8e458fcf2bd7"}, - {file = "tbb-2021.12.0-py3-none-win32.whl", hash = "sha256:b1725b30c174048edc8be70bd43bb95473f396ce895d91151a474d0fa9f450a8"}, - {file = "tbb-2021.12.0-py3-none-win_amd64.whl", hash = "sha256:fc2772d850229f2f3df85f1109c4844c495a2db7433d38200959ee9265b34789"}, -] - [[package]] name = "threadpoolctl" -version = "3.6.0" +version = "3.5.0" description = "threadpoolctl" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] files = [ - {file = "threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb"}, - {file = "threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e"}, + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, ] [[package]] @@ -2812,6 +2718,7 @@ version = "0.15.2" description = "" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "tokenizers-0.15.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:52f6130c9cbf70544287575a985bf44ae1bda2da7e8c24e97716080593638012"}, {file = "tokenizers-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:054c1cc9c6d68f7ffa4e810b3d5131e0ba511b6e4be34157aa08ee54c2f8d9ee"}, @@ -2939,6 +2846,8 @@ version = "2.0.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.7" +groups = ["dev"] +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -2946,69 +2855,65 @@ files = [ [[package]] name = "torch" -version = "2.3.0+cpu" +version = "2.7.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false -python-versions = ">=3.8.0" -files = [ - {file = "torch-2.3.0+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:e3c220702d82c7596924150e0499fbbffcf62a88a59adc860fa357cd8dc1c302"}, - {file = "torch-2.3.0+cpu-cp310-cp310-win_amd64.whl", hash = "sha256:ab0c05525195b8fecdf2ea75968ed32ccd87dff16381b6e13249babb4a9596ff"}, - {file = "torch-2.3.0+cpu-cp311-cp311-linux_x86_64.whl", hash = "sha256:97a38b25ee0e3d020691e7846efbca62a3d8a57645c027dcb5ba0adfec36fe55"}, - {file = "torch-2.3.0+cpu-cp311-cp311-win_amd64.whl", hash = "sha256:a8ac195974be6f067245bae8156b8c06fb0a723b0eed8f2e244b5dd58c7e2a49"}, - {file = "torch-2.3.0+cpu-cp312-cp312-linux_x86_64.whl", hash = "sha256:a8982e52185771591dad577a124a7770f72f288f8ae5833317b1e329c0d2f07e"}, - {file = "torch-2.3.0+cpu-cp312-cp312-win_amd64.whl", hash = "sha256:483131a7997995d867313ee902743084e844e830ab2a0c5e079c61ec2da3cd17"}, - {file = "torch-2.3.0+cpu-cp38-cp38-linux_x86_64.whl", hash = "sha256:8c52484880d5fbe511cffc255dd34847ddeced3f94334c6bf7eb2b0445f10cb4"}, - {file = "torch-2.3.0+cpu-cp38-cp38-win_amd64.whl", hash = "sha256:28a11bcc0d709b397d675cff689707019b8cc122e6bf328b57b900f47c36f156"}, - {file = "torch-2.3.0+cpu-cp39-cp39-linux_x86_64.whl", hash = "sha256:1e86e225e472392440ace378ba3165b5e87648e8b5fbf16adc41c0df881c38b8"}, - {file = "torch-2.3.0+cpu-cp39-cp39-win_amd64.whl", hash = "sha256:5c2afdff80203eaabf4c223a294c2f465020b3360e8e87f76b52ace9c5801ebe"}, +python-versions = ">=3.9.0" +groups = ["main", "dev"] +files = [ + {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a103b5d782af5bd119b81dbcc7ffc6fa09904c423ff8db397a1e6ea8fd71508f"}, + {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:fe955951bdf32d182ee8ead6c3186ad54781492bf03d547d31771a01b3d6fb7d"}, + {file = "torch-2.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:885453d6fba67d9991132143bf7fa06b79b24352f4506fd4d10b309f53454162"}, + {file = "torch-2.7.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d72acfdb86cee2a32c0ce0101606f3758f0d8bb5f8f31e7920dc2809e963aa7c"}, + {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:236f501f2e383f1cb861337bdf057712182f910f10aeaf509065d54d339e49b2"}, + {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:06eea61f859436622e78dd0cdd51dbc8f8c6d76917a9cf0555a333f9eac31ec1"}, + {file = "torch-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:8273145a2e0a3c6f9fd2ac36762d6ee89c26d430e612b95a99885df083b04e52"}, + {file = "torch-2.7.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aea4fc1bf433d12843eb2c6b2204861f43d8364597697074c8d38ae2507f8730"}, + {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ea1e518df4c9de73af7e8a720770f3628e7f667280bce2be7a16292697e3fa"}, + {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c33360cfc2edd976c2633b3b66c769bdcbbf0e0b6550606d188431c81e7dd1fc"}, + {file = "torch-2.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:d8bf6e1856ddd1807e79dc57e54d3335f2b62e6f316ed13ed3ecfe1fc1df3d8b"}, + {file = "torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:787687087412c4bd68d315e39bc1223f08aae1d16a9e9771d95eabbb04ae98fb"}, + {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:03563603d931e70722dce0e11999d53aa80a375a3d78e6b39b9f6805ea0a8d28"}, + {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d632f5417b6980f61404a125b999ca6ebd0b8b4bbdbb5fbbba44374ab619a412"}, + {file = "torch-2.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:23660443e13995ee93e3d844786701ea4ca69f337027b05182f5ba053ce43b38"}, + {file = "torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0da4f4dba9f65d0d203794e619fe7ca3247a55ffdcbd17ae8fb83c8b2dc9b585"}, + {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e08d7e6f21a617fe38eeb46dd2213ded43f27c072e9165dc27300c9ef9570934"}, + {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:30207f672328a42df4f2174b8f426f354b2baa0b7cca3a0adb3d6ab5daf00dc8"}, + {file = "torch-2.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:79042feca1c634aaf6603fe6feea8c6b30dfa140a6bbc0b973e2260c7e79a22e"}, + {file = "torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:988b0cbc4333618a1056d2ebad9eb10089637b659eb645434d0809d8d937b946"}, + {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:e0d81e9a12764b6f3879a866607c8ae93113cbcad57ce01ebde63eb48a576369"}, + {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8394833c44484547ed4a47162318337b88c97acdb3273d85ea06e03ffff44998"}, + {file = "torch-2.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:df41989d9300e6e3c19ec9f56f856187a6ef060c3662fe54f4b6baf1fc90bd19"}, + {file = "torch-2.7.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a737b5edd1c44a5c1ece2e9f3d00df9d1b3fb9541138bee56d83d38293fb6c9d"}, ] [package.dependencies] filelock = "*" fsspec = "*" jinja2 = "*" -mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""} networkx = "*" -sympy = "*" -typing-extensions = ">=4.8.0" +nvidia-cublas-cu12 = {version = "12.6.4.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-cupti-cu12 = {version = "12.6.80", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-nvrtc-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-runtime-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cudnn-cu12 = {version = "9.5.1.17", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufft-cu12 = {version = "11.3.0.4", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufile-cu12 = {version = "1.11.1.6", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-curand-cu12 = {version = "10.3.7.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusolver-cu12 = {version = "11.7.1.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparse-cu12 = {version = "12.5.4.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparselt-cu12 = {version = "0.6.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nccl-cu12 = {version = "2.26.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvjitlink-cu12 = {version = "12.6.85", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvtx-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +setuptools = {version = "*", markers = "python_version >= \"3.12\""} +sympy = ">=1.13.3" +triton = {version = "3.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +typing-extensions = ">=4.10.0" [package.extras] opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.9.1)"] - -[package.source] -type = "legacy" -url = "https://download.pytorch.org/whl/cpu" -reference = "pytorch" - -[[package]] -name = "torchmetrics" -version = "1.4.0.post0" -description = "PyTorch native Metrics" -optional = false -python-versions = ">=3.8" -files = [ - {file = "torchmetrics-1.4.0.post0-py3-none-any.whl", hash = "sha256:ab234216598e3fbd8d62ee4541a0e74e7e8fc935d099683af5b8da50f745b3c8"}, - {file = "torchmetrics-1.4.0.post0.tar.gz", hash = "sha256:ab9bcfe80e65dbabbddb6cecd9be21f1f1d5207bb74051ef95260740f2762358"}, -] - -[package.dependencies] -lightning-utilities = ">=0.8.0" -numpy = ">1.20.0" -packaging = ">17.1" -torch = ">=1.10.0" - -[package.extras] -all = ["SciencePlots (>=2.0.0)", "ipadic (>=1.0.0)", "matplotlib (>=3.3.0)", "mecab-python3 (>=1.0.6)", "mypy (==1.9.0)", "nltk (>=3.6)", "piq (<=0.8.0)", "pretty-errors (>=1.2.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "regex (>=2021.9.24)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "torch (==2.3.0)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] -audio = ["pystoi (>=0.3.0)", "torchaudio (>=0.10.0)"] -debug = ["pretty-errors (>=1.2.0)"] -detection = ["pycocotools (>2.0.0)", "torchvision (>=0.8)"] -dev = ["SciencePlots (>=2.0.0)", "bert-score (==0.3.13)", "dython (<=0.7.5)", "fairlearn", "fast-bss-eval (>=0.1.0)", "faster-coco-eval (>=1.3.3)", "huggingface-hub (<0.23)", "ipadic (>=1.0.0)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "lpips (<=0.1.4)", "matplotlib (>=3.3.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "mir-eval (>=0.6)", "monai (==1.3.0)", "mypy (==1.9.0)", "netcal (>1.0.0)", "nltk (>=3.6)", "numpy (<1.27.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "piq (<=0.8.0)", "pretty-errors (>=1.2.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "pytorch-msssim (==1.0.0)", "regex (>=2021.9.24)", "rouge-score (>0.1.0)", "sacrebleu (>=2.3.0)", "scikit-image (>=0.19.0)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch (==2.3.0)", "torch-complex (<=0.4.3)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] -image = ["scipy (>1.0.0)", "torch-fidelity (<=0.4.0)", "torchvision (>=0.8)"] -multimodal = ["piq (<=0.8.0)", "transformers (>=4.10.0)"] -text = ["ipadic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "nltk (>=3.6)", "regex (>=2021.9.24)", "sentencepiece (>=0.2.0)", "tqdm (>=4.41.0)", "transformers (>4.4.0)"] -typing = ["mypy (==1.9.0)", "torch (==2.3.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] -visual = ["SciencePlots (>=2.0.0)", "matplotlib (>=3.3.0)"] +optree = ["optree (>=0.13.0)"] [[package]] name = "tqdm" @@ -3016,6 +2921,7 @@ version = "4.66.4" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"}, {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"}, @@ -3032,16 +2938,18 @@ telegram = ["requests"] [[package]] name = "transformer-smaller-training-vocab" -version = "0.4.1" +version = "0.4.0" description = "Temporary remove unused tokens during training to save ram and speed." optional = false -python-versions = "<4.0,>=3.9" +python-versions = "<4.0,>=3.8" +groups = ["dev"] files = [ - {file = "transformer_smaller_training_vocab-0.4.1-py3-none-any.whl", hash = "sha256:e19c4c95b594569710b1235f00ff69ddad6401a15554e59657f768dde885bb3a"}, - {file = "transformer_smaller_training_vocab-0.4.1.tar.gz", hash = "sha256:834a804a712ba23cbe410e390791db70d7812b0d7d3bfe1de3efa7b89a85a06c"}, + {file = "transformer_smaller_training_vocab-0.4.0-py3-none-any.whl", hash = "sha256:01cb3d8f4818121172e1591a06c3149bf49bc18d6f6f269eb42d2c4ed155cfcc"}, + {file = "transformer_smaller_training_vocab-0.4.0.tar.gz", hash = "sha256:d7360ac084786f66f99ef16d621f34acbb0dce6d9a624525d1f7dc8b6c3a49f7"}, ] [package.dependencies] +numpy = {version = ">=1.21.0,<2.0.0", markers = "python_version >= \"3.9\""} torch = ">=1.8.0,<2.0.1 || >2.0.1,<3.0.0" transformers = {version = ">=4.1,<5.0", extras = ["sentencepiece", "torch"]} @@ -3051,6 +2959,7 @@ version = "4.36.2" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.8.0" +groups = ["dev"] files = [ {file = "transformers-4.36.2-py3-none-any.whl", hash = "sha256:462066c4f74ee52516f12890dcc9ec71d1a5e97998db621668455117a54330f6"}, {file = "transformers-4.36.2.tar.gz", hash = "sha256:d8068e897e47793281501e547d2bbdfc5b8556409c2cb6c3d9e2ca77d4c0b4ec"}, @@ -3117,12 +3026,38 @@ torchhub = ["filelock", "huggingface-hub (>=0.19.3,<1.0)", "importlib-metadata", video = ["av (==9.2.0)", "decord (==0.6.0)"] vision = ["Pillow (>=10.0.1,<=15.0)"] +[[package]] +name = "triton" +version = "3.3.1" +description = "A language and compiler for custom Deep Learning operations" +optional = false +python-versions = "*" +groups = ["main", "dev"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "triton-3.3.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b74db445b1c562844d3cfad6e9679c72e93fdfb1a90a24052b03bb5c49d1242e"}, + {file = "triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b31e3aa26f8cb3cc5bf4e187bf737cbacf17311e1112b781d4a059353dfd731b"}, + {file = "triton-3.3.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9999e83aba21e1a78c1f36f21bce621b77bcaa530277a50484a7cb4a822f6e43"}, + {file = "triton-3.3.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b89d846b5a4198317fec27a5d3a609ea96b6d557ff44b56c23176546023c4240"}, + {file = "triton-3.3.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3198adb9d78b77818a5388bff89fa72ff36f9da0bc689db2f0a651a67ce6a42"}, + {file = "triton-3.3.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6139aeb04a146b0b8e0fbbd89ad1e65861c57cfed881f21d62d3cb94a36bab7"}, +] + +[package.dependencies] +setuptools = ">=40.8.0" + +[package.extras] +build = ["cmake (>=3.20)", "lit"] +tests = ["autopep8", "isort", "llnl-hatchet", "numpy", "pytest", "pytest-forked", "pytest-xdist", "scipy (>=1.7.1)"] +tutorials = ["matplotlib", "pandas", "tabulate"] + [[package]] name = "typing-extensions" version = "4.12.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, @@ -3130,13 +3065,14 @@ files = [ [[package]] name = "tzdata" -version = "2025.2" +version = "2024.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main"] files = [ - {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, - {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] [[package]] @@ -3145,14 +3081,15 @@ version = "1.26.19" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"}, {file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"}, ] [package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -3161,6 +3098,7 @@ version = "20.26.2" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "virtualenv-20.26.2-py3-none-any.whl", hash = "sha256:a624db5e94f01ad993d476b9ee5346fdf7b9de43ccaee0e0197012dc838a0e9b"}, {file = "virtualenv-20.26.2.tar.gz", hash = "sha256:82bf0f4eebbb78d36ddaee0283d43fe5736b53880b8a8cdcd37390a07ac3741c"}, @@ -3173,7 +3111,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] [[package]] name = "wcwidth" @@ -3181,6 +3119,7 @@ version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -3188,12 +3127,14 @@ files = [ [[package]] name = "wikipedia-api" -version = "0.8.1" +version = "0.6.0" description = "Python Wrapper for Wikipedia" optional = false python-versions = "*" +groups = ["dev"] files = [ - {file = "wikipedia_api-0.8.1.tar.gz", hash = "sha256:b31e93b3f5407c1a1ba413ed7326a05379a3c270df6cf6a211aca67a14c5658b"}, + {file = "Wikipedia-API-0.6.0.tar.gz", hash = "sha256:61e94921cca9ec68e92aa5f258261d6a88b7baa960f9acfcb0c9c2c525dcb3ff"}, + {file = "Wikipedia_API-0.6.0-py3-none-any.whl", hash = "sha256:6dfd6b3b680e342a3843fe954049c5784c1a67fadc0060f9d1696d1d0e41ecfb"}, ] [package.dependencies] @@ -3201,215 +3142,102 @@ requests = "*" [[package]] name = "wrapt" -version = "1.17.2" +version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." optional = false -python-versions = ">=3.8" -files = [ - {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"}, - {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"}, - {file = "wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7"}, - {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c"}, - {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72"}, - {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061"}, - {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2"}, - {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c"}, - {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62"}, - {file = "wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563"}, - {file = "wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f"}, - {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58"}, - {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda"}, - {file = "wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438"}, - {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a"}, - {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000"}, - {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6"}, - {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b"}, - {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662"}, - {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72"}, - {file = "wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317"}, - {file = "wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3"}, - {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925"}, - {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392"}, - {file = "wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40"}, - {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d"}, - {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b"}, - {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98"}, - {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82"}, - {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae"}, - {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9"}, - {file = "wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9"}, - {file = "wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991"}, - {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125"}, - {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998"}, - {file = "wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5"}, - {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8"}, - {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6"}, - {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc"}, - {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2"}, - {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b"}, - {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504"}, - {file = "wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a"}, - {file = "wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845"}, - {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192"}, - {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b"}, - {file = "wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0"}, - {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306"}, - {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb"}, - {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681"}, - {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6"}, - {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6"}, - {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f"}, - {file = "wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555"}, - {file = "wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c"}, - {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9"}, - {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119"}, - {file = "wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6"}, - {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9"}, - {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a"}, - {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2"}, - {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a"}, - {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04"}, - {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f"}, - {file = "wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7"}, - {file = "wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3"}, - {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a"}, - {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061"}, - {file = "wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82"}, - {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9"}, - {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f"}, - {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b"}, - {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f"}, - {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8"}, - {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9"}, - {file = "wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb"}, - {file = "wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb"}, - {file = "wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8"}, - {file = "wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3"}, -] - -[[package]] -name = "yarl" -version = "1.9.4" -description = "Yet another URL library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"}, - {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"}, - {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"}, - {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"}, - {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"}, - {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"}, - {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"}, - {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"}, - {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"}, - {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"}, - {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"}, - {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"}, - {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"}, - {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"}, - {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"}, - {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"}, +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] -[package.dependencies] -idna = ">=2.0" -multidict = ">=4.0" - [[package]] name = "zipp" -version = "3.21.0" +version = "3.19.2" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version == \"3.9\"" files = [ - {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, - {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, + {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, + {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] -cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] -type = ["pytest-mypy"] +test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = "^3.9" -content-hash = "c95cdac43f10143aa8d2c695dc76c40400096001cc2c0f89f38c69fc0bdb67a0" +content-hash = "084124893c8d1054d2f99e0c3a8faa3346695641ecca8e97f2af90d38f842a84" diff --git a/pyproject.toml b/pyproject.toml index ceea5ca8b..1765ae0b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pie-modules" -version = "0.15.6" +version = "0.15.8" description = "Utility modules for Python-IE" authors = ["Arne Binder "] readme = "README.md" @@ -24,7 +24,7 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.9" -pie-core = ">=0.1.2,<0.2.0" +pie-core = ">=0.2.0,<0.3.0" pandas = ">=2.0.3,<3.0.0" [tool.poetry.group.dev.dependencies] From d6916e09f676b9c035adf9a359a70e4bf8b3011e Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Sat, 19 Jul 2025 17:19:18 +0200 Subject: [PATCH 10/11] fix merge issue --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1765ae0b9..fdd3853e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pie-modules" -version = "0.15.8" +version = "0.15.9" description = "Utility modules for Python-IE" authors = ["Arne Binder "] readme = "README.md" From 6ddb6b4f055991f3111920bfbc3d34cdf8248058 Mon Sep 17 00:00:00 2001 From: Arne Binder Date: Sat, 19 Jul 2025 17:26:57 +0200 Subject: [PATCH 11/11] add comment to pandas dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index fdd3853e4..1b27be77c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.9" pie-core = ">=0.2.0,<0.3.0" +# for show_as_markdown in metrics pandas = ">=2.0.3,<3.0.0" [tool.poetry.group.dev.dependencies]