Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,12 @@ name = "testpypi"
url = "https://test.pypi.org/simple/"
publish-url = "https://test.pypi.org/legacy/"
explicit = true

[tool.mypy]
# Docs: https://mypy.readthedocs.io/en/stable/config_file.html#using-a-pyproject-toml-file
python_version = "3.10"
warn_unused_configs = true
warn_return_any = false
namespace_packages = false
files = ["src/tsdata"]
exclude = "test"
1 change: 1 addition & 0 deletions src/tsdata/_loader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Implementing loading for datasets."""
26 changes: 26 additions & 0 deletions src/tsdata/_loader/as_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Load as a pandas dataframe."""

from pathlib import Path

import pandas as pd

from .base import FileDatasetCollection


class PandasRawDatasetCollection(FileDatasetCollection[pd.DataFrame]):
"""Load files from CSV or Parquet files, without post-processing."""

@classmethod
def supported_file_types(cls) -> set[str]:
"""File types supported by this Pandas type."""
return {"csv", "parquet"}

def load_file(self, path: Path) -> pd.DataFrame:
"""Load a file."""
match path.suffix:
case ".csv":
return pd.read_csv(path, index_col=False, header="infer")
case ".parquet":
return pd.read_parquet(path)
case _:
raise ValueError(f"Unsupported file type: {path.suffix}")
77 changes: 77 additions & 0 deletions src/tsdata/_loader/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Time series data loader."""

from abc import abstractmethod
from collections.abc import Iterator, Mapping
from itertools import chain
from pathlib import Path
from typing import Generic, TypeVar

T = TypeVar("T")


class DatasetCollection(Mapping[str, T]):
"""Lazily-loading dataset collection.

Mapping between dataset name and the eventual return type.
"""

@abstractmethod
def __getitem__(self, key: str) -> T:
"""Load the dataset with the given name."""

@abstractmethod
def __len__(self) -> int:
"""The number of datasets available."""

@abstractmethod
def __iter__(self) -> Iterator[str]:
"""Iterator over keys."""


class FileDatasetCollection(DatasetCollection[T], Generic[T]):
"""File-based dataset collection."""

# To override

@classmethod
@abstractmethod
def supported_file_types(self) -> set[str]:
"""File types supported by this dataset type."""

@abstractmethod
def load_file(self, path: Path) -> T:
"""Load the file given the path as the given return type."""

# Ready parts

def __init__(self, dir: Path | str) -> None:
"""Create the dataset based on the given directory."""
self.dir = Path(dir).resolve().absolute()

def __repr__(self) -> str:
"""Recreative string representation."""
cn = type(self).__qualname__
d = str(self.dir)
return f"{cn}({d!r})"

@property
def files(self) -> dict[str, Path]:
"""Iterator over all files within the dataset's directory."""
globs = [self.dir.glob(f"*.{ft}") for ft in self.supported_file_types()]
files = {x.stem: x for x in chain(*globs)}
return files

# Implement mapping interface

def __iter__(self) -> Iterator[str]:
"""Iterator over keys."""
return iter(self.files.keys())

def __len__(self) -> int:
"""Number of avilable files."""
return len(self.files)

def __getitem__(self, key: str) -> T:
"""Load file based on key."""
filepath = self.files[key]
return self.load_file(filepath)
File renamed without changes.
File renamed without changes.
25 changes: 25 additions & 0 deletions src/tsdata/fpp3/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Loads data exported from Forecasting: Principles and Practice, 3rd Edition.

This module is automatically generated from available CSV files.

See Also
--------
https://otexts.com/fpp3/
"""

from pathlib import Path

import pandas as pd

from tsdata._loader.as_pandas import PandasRawDatasetCollection

raw = PandasRawDatasetCollection(dir=Path(__file__).parent)


def __getattr__(name: str) -> pd.DataFrame:
if name in raw.keys():
return raw[name]
try:
return globals()[name]
except KeyError:
raise AttributeError(name)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
145 changes: 145 additions & 0 deletions src/tsdata/fpppy/AirPassengers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
ds,y
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121
1949-06-01,135
1949-07-01,148
1949-08-01,148
1949-09-01,136
1949-10-01,119
1949-11-01,104
1949-12-01,118
1950-01-01,115
1950-02-01,126
1950-03-01,141
1950-04-01,135
1950-05-01,125
1950-06-01,149
1950-07-01,170
1950-08-01,170
1950-09-01,158
1950-10-01,133
1950-11-01,114
1950-12-01,140
1951-01-01,145
1951-02-01,150
1951-03-01,178
1951-04-01,163
1951-05-01,172
1951-06-01,178
1951-07-01,199
1951-08-01,199
1951-09-01,184
1951-10-01,162
1951-11-01,146
1951-12-01,166
1952-01-01,171
1952-02-01,180
1952-03-01,193
1952-04-01,181
1952-05-01,183
1952-06-01,218
1952-07-01,230
1952-08-01,242
1952-09-01,209
1952-10-01,191
1952-11-01,172
1952-12-01,194
1953-01-01,196
1953-02-01,196
1953-03-01,236
1953-04-01,235
1953-05-01,229
1953-06-01,243
1953-07-01,264
1953-08-01,272
1953-09-01,237
1953-10-01,211
1953-11-01,180
1953-12-01,201
1954-01-01,204
1954-02-01,188
1954-03-01,235
1954-04-01,227
1954-05-01,234
1954-06-01,264
1954-07-01,302
1954-08-01,293
1954-09-01,259
1954-10-01,229
1954-11-01,203
1954-12-01,229
1955-01-01,242
1955-02-01,233
1955-03-01,267
1955-04-01,269
1955-05-01,270
1955-06-01,315
1955-07-01,364
1955-08-01,347
1955-09-01,312
1955-10-01,274
1955-11-01,237
1955-12-01,278
1956-01-01,284
1956-02-01,277
1956-03-01,317
1956-04-01,313
1956-05-01,318
1956-06-01,374
1956-07-01,413
1956-08-01,405
1956-09-01,355
1956-10-01,306
1956-11-01,271
1956-12-01,306
1957-01-01,315
1957-02-01,301
1957-03-01,356
1957-04-01,348
1957-05-01,355
1957-06-01,422
1957-07-01,465
1957-08-01,467
1957-09-01,404
1957-10-01,347
1957-11-01,305
1957-12-01,336
1958-01-01,340
1958-02-01,318
1958-03-01,362
1958-04-01,348
1958-05-01,363
1958-06-01,435
1958-07-01,491
1958-08-01,505
1958-09-01,404
1958-10-01,359
1958-11-01,310
1958-12-01,337
1959-01-01,360
1959-02-01,342
1959-03-01,406
1959-04-01,396
1959-05-01,420
1959-06-01,472
1959-07-01,548
1959-08-01,559
1959-09-01,463
1959-10-01,407
1959-11-01,362
1959-12-01,405
1960-01-01,417
1960-02-01,391
1960-03-01,419
1960-04-01,461
1960-05-01,472
1960-06-01,535
1960-07-01,622
1960-08-01,606
1960-09-01,508
1960-10-01,461
1960-11-01,390
1960-12-01,432
Loading