Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Build Artifacts
build/
doc/rebuild.sh

# Core Dumps
core
Expand Down
2 changes: 1 addition & 1 deletion doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ other content generation tasks.
:maxdepth: 1
:caption: API Reference

reference/assets
reference/checkpoint
reference/fairseq2.assets
reference/fairseq2.data
reference/fairseq2.data.tokenizers
reference/fairseq2.data.tokenizers.hub
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ fairseq2.assets
The assets module provides a model hub system for loading pre-trained models,
tokenizers, and other assets.

.. autoclass:: AssetStore
:members:
:undoc-members:
:show-inheritance:

.. autoclass:: StandardAssetStore
:members:
:undoc-members:
:show-inheritance:

.. autoclass:: AssetNotFoundError

**Coming soon:** This documentation is being developed. The assets module includes:

- Asset store and registry
Expand Down
52 changes: 52 additions & 0 deletions src/fairseq2/assets/card.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

T = TypeVar("T", bool, int, float, str)

"""
`Cards` represent unique identifiers for various assets, including
datasets, models, and tokenizers. They contain the information needed
to load and/or pull a given asset.
"""


@final
class AssetCard:
Expand All @@ -32,6 +38,15 @@ def __init__(
self._base = base

def field(self, name: str) -> AssetCardField:
"""
Retrieves a field from an asset card

:param name: The field in the asset card to retrieve.

:raises AssetCardError: if the field does not exist.

:returns: the asset card field
"""
field = self.maybe_get_field(name)
if field is None:
msg = f"{self._name} asset card does not have a field named {name}."
Expand All @@ -41,6 +56,18 @@ def field(self, name: str) -> AssetCardField:
return field

def maybe_get_field(self, name: str) -> AssetCardField | None:
"""
Attempts to retrieve the field from an asset card

Returns ``None`` if the asset card passed is none.

:param name: The field of the asset card to attempt to retrieve.

:raises KeyError: If the name field in the card metadata
cannot be found.

:returns: The designated asset field.
"""
card: AssetCard | None = self

while card is not None:
Expand All @@ -56,6 +83,13 @@ def maybe_get_field(self, name: str) -> AssetCardField | None:
return None

def has_field(self, name: str) -> bool:
"""
Returns whether a card contains a field

:param name: The field name to check.

:returns: Whether the field exists
"""
card: AssetCard | None = self

while card is not None:
Expand Down Expand Up @@ -128,16 +162,34 @@ def as_uri(self) -> Uri:


class AssetCardError(Exception):
"""Raised when an `AssetCard` of a given name cannot be found"""

def __init__(self, name: str, message: str) -> None:
super().__init__(message)

self.name = name


class AssetConfigLoader(ABC):
"""Represents a loader for `AssetCard` configuration files"""

@abstractmethod
def load(self, card: AssetCard, base_config: object, config_key: str) -> object: ...

"""
Loads an asset from an `AssetCard`

:param card: The card of the asset to load.

:param base_config: The base configuration of the asset to be loaded.

:param config_key: The specific configuration for the requested asset.

:raises AssetCardError: If the requested `AssetCard` or config ``object`
are not able to be parsed or merged with the base configuration
or if a directive cannot be processed.
"""


@final
class StandardAssetConfigLoader(AssetConfigLoader):
Expand Down
11 changes: 11 additions & 0 deletions src/fairseq2/assets/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,17 @@
from fairseq2.logging import log
from fairseq2.utils.env import Environment, EnvironmentVariableError

"""
`AssetDirectoryAccessor` ``objects`` offer an abstraction to handle access to directories where `Asset` types are stored
"""


class AssetDirectoryAccessor(ABC):
"""
Represents a way to access system, user, or cache directories as a `Path`
on the local system
"""

@abstractmethod
def maybe_get_system_dir(self) -> Path | None: ...

Expand All @@ -31,6 +40,8 @@ def get_cache_dir(self) -> Path: ...

@final
class StandardAssetDirectoryAccessor(AssetDirectoryAccessor):
"""Represents a standard way to access an asset directory via a `FileSystem`"""

def __init__(self, env: Environment, file_system: FileSystem) -> None:
self._env = env
self._file_system = file_system
Expand Down
9 changes: 9 additions & 0 deletions src/fairseq2/assets/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
from fairseq2.utils.progress import ProgressReporter
from fairseq2.utils.uri import Uri

"""
An `AssetDownloadManager` ``object`` offers abstractions for
downloading assets, such as models, tokenizers, and datasets, from
a remote repository.
"""


def get_asset_download_manager() -> AssetDownloadManager:
return get_dependency_resolver().resolve(AssetDownloadManager)
Expand Down Expand Up @@ -119,6 +125,9 @@ def supported_schemes(self) -> Set[str]: ...


class AssetDownloadError(Exception):
"""Raised when an `Asset` is unable to be downloaded due to
non-existence or a network error"""

def __init__(self, asset_name: str, asset_kind: str, message: str) -> None:
super().__init__(message)

Expand Down
52 changes: 49 additions & 3 deletions src/fairseq2/assets/metadata_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@
from fairseq2.file_system import FileSystem
from fairseq2.utils.yaml import YamlError, YamlLoader

"""
Asset metadata is managed via `AssetMetadataProvider` ``objects``. These can
be accessed via a Python namespace package, or user or cache directories.
"""

class AssetMetadataProvider(ABC):
"""Provides asset metadata."""

class AssetMetadataProvider(ABC):
@abstractmethod
def maybe_get_metadata(self, name: str) -> dict[str, object] | None:
"""
Expand All @@ -43,8 +46,15 @@ def asset_names(self) -> Set[str]:
@abstractmethod
def source(self) -> str: ...

"""Returns the source of the asset from the metadata"""


class AssetMetadataError(Exception):
"""
Raised when metadata is unable to be retrieved due to it not existing,
being improperly formatted in the request, or not supported
"""

def __init__(self, source: str, message: str) -> None:
super().__init__(message)

Expand All @@ -53,6 +63,8 @@ def __init__(self, source: str, message: str) -> None:

@final
class CachedAssetMetadataProvider(AssetMetadataProvider):
"""Provides metadata for assets that have been saved to the cache dir"""

def __init__(self, source: str, metadata: dict[str, dict[str, object]]) -> None:
self._source = source
self._metadata = metadata
Expand Down Expand Up @@ -85,8 +97,12 @@ class AssetMetadataSource(ABC):
@abstractmethod
def load(self) -> Iterator[AssetMetadataProvider]: ...

"""Load an `Asset` source from metadata"""


class AssetSourceNotFoundError(Exception):
"""Raised if an `Asset` source is unable to be located or is not supported"""

def __init__(self, source: str) -> None:
super().__init__(f"{source} asset source is not found.")

Expand All @@ -97,6 +113,8 @@ class FileAssetMetadataLoader(ABC):
@abstractmethod
def load(self, path: Path) -> AssetMetadataProvider: ...

"""Load `Asset` metadata from a `Path`"""


@final
class StandardFileAssetMetadataLoader(FileAssetMetadataLoader):
Expand Down Expand Up @@ -163,6 +181,8 @@ class PackageAssetMetadataLoader(ABC):
@abstractmethod
def load(self, package: str) -> AssetMetadataProvider: ...

"""Loads `Asset` metadata from a Python namespace package"""


@final
class StandardPackageAssetMetadataLoader(PackageAssetMetadataLoader):
Expand Down Expand Up @@ -200,12 +220,18 @@ def load(self, package: str) -> AssetMetadataProvider:


class PackageFileLister(ABC):
"""Provides a way to list files stored within a package"""

@abstractmethod
def list(self, package: str, source: str) -> list[Path]: ...

"""List files stored in a package from source ``str``"""


@final
class StandardPackageFileLister(PackageFileLister):
"""Lists standard Python namespace package files"""

@override
def list(self, package: str, source: str) -> list[Path]:
files = []
Expand Down Expand Up @@ -239,6 +265,16 @@ def collect_files(p: MultiplexedPath | Path) -> None:
def load_in_memory_asset_metadata(
source: str, entries: Sequence[dict[str, object]]
) -> AssetMetadataProvider:
"""
Loads `Assets` named in metadata into memory

:raises AssetMetadataError: If the `Asset` `metadata` does not list a name,
the name is not of type ``str``, or the asset name already exists.

:raises AssetMetadataError: If the `Asset` `metadata` does not contain a
valid base name or if the base name is not of type ``str``.

"""
metadata = {}

for idx, asset_metadata in enumerate(entries):
Expand Down Expand Up @@ -285,11 +321,13 @@ def load_in_memory_asset_metadata(
class AssetMetadataFileLoader(ABC):
@abstractmethod
def load(self, file: Path, source: str) -> list[tuple[str, dict[str, object]]]:
"""Load asset metadata included in ``file``."""
"""Load asset metadata included in a single named file."""


@final
class YamlAssetMetadataFileLoader(AssetMetadataFileLoader):
"""Loader for `Asset` metadata stored in a YAML file."""

def __init__(self, yaml_loader: YamlLoader) -> None:
self._yaml_loader = yaml_loader

Expand Down Expand Up @@ -379,6 +417,8 @@ def sanitize_base_asset_name(name: str) -> str | None:

@final
class WellKnownAssetMetadataSource(AssetMetadataSource):
"""Represents metadata for an `Asset` already known to the library"""

def __init__(
self, dirs: AssetDirectoryAccessor, metadata_loader: FileAssetMetadataLoader
) -> None:
Expand Down Expand Up @@ -407,6 +447,8 @@ def load(self) -> Iterator[AssetMetadataProvider]:

@final
class FileAssetMetadataSource(AssetMetadataSource):
"""Represents `Asset` metadata from a single source file"""

def __init__(
self, path: Path, metadata_loader: FileAssetMetadataLoader, not_exist_ok: bool
) -> None:
Expand All @@ -429,6 +471,8 @@ def load(self) -> Iterator[AssetMetadataProvider]:

@final
class PackageAssetMetadataSource(AssetMetadataSource):
"""Represents `Asset` metadata from a Python namespace package"""

def __init__(
self, package: str, metadata_loader: PackageAssetMetadataLoader
) -> None:
Expand All @@ -445,6 +489,8 @@ def load(self) -> Iterator[AssetMetadataProvider]:

@final
class InMemoryAssetMetadataSource(AssetMetadataSource):
"""Represents metadata from an `Asset` in memory"""

def __init__(self, name: str, entries: Sequence[dict[str, object]]) -> None:
self._name = name
self._entries = entries
Expand Down
Loading
Loading