From 43c66d31e244b9c5b17c365f49cdd0bf7d61036c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= Date: Mon, 18 May 2026 13:13:35 -0300 Subject: [PATCH 1/2] fix(docker): simplify docker instalation & some fixes --- .github/workflows/python-package.yml | 56 ++++++++++++++++- README.md | 24 +++++++- docker/Dockerfile | 47 +++++---------- docker/docker-compose.yaml | 4 +- docker/notebooks/Welcome.ipynb | 89 ++++++++++++++++++++++++++++ docker/scripts/entrypoint.sh | 21 ++++++- docker/scripts/poetry-install.sh | 2 +- pysus/api/_impl/databases.py | 20 ++++++- pysus/api/client.py | 13 ++-- pysus/tests/api/ftp/test_models.py | 4 +- 10 files changed, 235 insertions(+), 45 deletions(-) create mode 100644 docker/notebooks/Welcome.ipynb diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 5a0ee038..e68d3182 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -48,13 +48,67 @@ jobs: if: matrix.os == 'ubuntu-latest' run: pre-commit run --files pysus/**/* - - name: Tests + - name: Tests (Linux) + if: matrix.os != 'windows-latest' run: | poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15 --cov=pysus --cov-report=xml:coverage.xml --cov-report=term-missing + - name: Tests (Windows) + if: matrix.os == 'windows-latest' + shell: bash -l {0} + run: | + for i in 1 2 3; do + echo "Attempt $i..." + poetry run python -m pytest -vv pysus/tests/ --retries 3 --retry-delay 15 -p no:cacheprovider & + PID=$! + for j in $(seq 1 480); do + if ! kill -0 $PID 2>/dev/null; then + wait $PID + EXIT_CODE=$? + if [ $EXIT_CODE -eq 0 ]; then + exit 0 + fi + break + fi + sleep 1 + done + kill $PID 2>/dev/null || true + echo "Attempt $i failed" + done + exit 1 + - name: Upload coverage to Codecov if: matrix.os == 'ubuntu-latest' uses: codecov/codecov-action@v5 with: files: ./coverage.xml fail_ci_if_error: false + + docker-tests: + runs-on: ubuntu-latest + timeout-minutes: 15 + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v4 + + - name: Build Docker image + run: docker compose -f docker/docker-compose.yaml build + + - name: Start container + run: docker compose -f docker/docker-compose.yaml up -d + + - name: Wait for Jupyter + run: | + for i in $(seq 1 10); do + curl -s -o /dev/null http://127.0.0.1:8888 && break + sleep 2 + done + + - name: Run tests inside container + run: docker compose -f docker/docker-compose.yaml exec -T -w /usr/src jupyter python3 -m pytest -vv pysus/tests/ --retries 3 --retry-delay 15 -x -o cache_dir=/tmp/.pytest_cache + + - name: Cleanup + if: always() + run: docker compose -f docker/docker-compose.yaml down -v diff --git a/README.md b/README.md index 11a5ba38..356f9c95 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,22 @@ For the terminal user interface (TUI): pip install pysus[tui] ``` +### Docker + +Build and start a JupyterLab container with PySUS pre-installed: + +```bash +docker compose -f docker/docker-compose.yaml up --build +``` + +Then open [http://127.0.0.1:8888/lab](http://127.0.0.1:8888/lab) in your browser. + +Stop the container: + +```bash +docker compose -f docker/docker-compose.yaml down +``` + ## Quick Start ### Simplified Database Functions (New in 2.0) @@ -41,7 +57,7 @@ The easiest way to get data as a pandas DataFrame: ```python from pysus import sinan, sinasc, sim, sih, sia, pni, ibge, cnes, ciha -# Download SINAN Dengue data for 2024 +# Download SINAN Dengue data for 2000 df = sinan(disease="deng", year=2000) # Multiple years @@ -238,6 +254,12 @@ Run tests: pytest tests/ ``` +Run tests inside the Docker container: + +```bash +docker compose -f docker/docker-compose.yaml exec -T -w /usr/src jupyter python3 -m pytest pysus/tests/ +``` + ## License GPL diff --git a/docker/Dockerfile b/docker/Dockerfile index 82694393..74af492d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,21 +1,15 @@ -FROM condaforge/mambaforge +FROM python:3.12-slim -LABEL maintainer="es.loch@gmail.com" - -USER root +LABEL maintainer="luabidaa@gmail.com" ENV DEBIAN_FRONTEND=noninteractive - ENV HOME "/home/pysus" ENV PATH "$PATH:/home/pysus/.local/bin" -ENV ENV_NAME pysus -ENV PATH "/opt/conda/envs/$ENV_NAME/bin:$PATH" -ENV PATH "/opt/poetry/bin:$PATH" RUN apt-get -qq update --yes \ && apt-get -qq install --yes --no-install-recommends \ + libffi-dev \ build-essential \ - firefox \ ca-certificates \ sudo \ curl \ @@ -23,30 +17,21 @@ RUN apt-get -qq update --yes \ RUN useradd -ms /bin/bash pysus \ && echo "pysus ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/pysus \ - && chmod 0440 /etc/sudoers.d/ \ - && echo 'source /opt/conda/bin/activate "$ENV_NAME" && exec "$@"' > /activate.sh \ - && echo 'source activate "$ENV_NAME"' > /home/pysus/.bashrc \ - && chmod +x /activate.sh \ - && chmod -R a+rwx /opt/conda /tmp \ - && sudo chown -R pysus:pysus /usr/src - -USER pysus + && chmod 0440 /etc/sudoers.d/pysus \ + && mkdir -p /home/pysus/Notebooks \ + && chown -R pysus:pysus /home/pysus -RUN mkdir -p /home/pysus/Notebooks/ +COPY pyproject.toml poetry.lock LICENSE README.md /usr/src/ +COPY pysus /usr/src/pysus +COPY docker/scripts/entrypoint.sh /entrypoint.sh +COPY docker/notebooks/ /home/pysus/Notebooks/ -COPY --chown=pysus:pysus conda/dev.yaml /tmp/dev.yaml -COPY --chown=pysus:pysus docker/scripts/entrypoint.sh /entrypoint.sh -COPY --chown=pysus:pysus docker/scripts/poetry-install.sh /tmp/poetry-install.sh -COPY --chown=pysus:pysus pyproject.toml poetry.lock LICENSE README.md /usr/src/ -COPY --chown=pysus:pysus pysus /usr/src/pysus -COPY --chown=pysus:pysus docs/source/**/*.ipynb /home/pysus/Notebooks/ -COPY --chown=pysus:pysus docs/source/data /home/pysus/Notebooks/ - -RUN mamba env create -n $ENV_NAME --file /tmp/dev.yaml \ - && mamba clean -afy - -RUN cd /usr/src/ && bash /tmp/poetry-install.sh +RUN pip install poetry \ + && cd /usr/src && poetry config virtualenvs.create false && poetry install --with docs --extras dbc \ + && pip install 'httpx<0.28' \ + && chown -R pysus:pysus /home/pysus +USER pysus WORKDIR /home/pysus/Notebooks -ENTRYPOINT ["bash", "/activate.sh", "jupyter", "notebook", "--port=8888", "--ip=0.0.0.0"] +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 36965c91..a854abe4 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -1,4 +1,3 @@ -version: '3.3' services: jupyter: build: @@ -6,6 +5,8 @@ services: dockerfile: docker/Dockerfile hostname: pysus-jupyter container_name: pysus-jupyter + ports: + - "8888:8888" privileged: true environment: - DISPLAY=:0 @@ -13,4 +14,3 @@ services: volumes: - /tmp/.X11-unix:/tmp/.X11-unix entrypoint: ["/entrypoint.sh"] - command: ["/usr/bin/firefox"] diff --git a/docker/notebooks/Welcome.ipynb b/docker/notebooks/Welcome.ipynb new file mode 100644 index 00000000..c1d645a3 --- /dev/null +++ b/docker/notebooks/Welcome.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Welcome to PySUS\n", + "\n", + "PySUS provides tools for dealing with Brazil's public health data (SINAN, SINASC, SIM, SIH, SIA, PNI, IBGE, CNES, CIHA).\n", + "\n", + "## Quick start\n", + "\n", + "List available datasets and files:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pysus\n", + "\n", + "pysus.list_files()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fetch SINAN data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pysus import sinan\n", + "\n", + "df = sinan(\"deng\", year=2023)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fetch SINASC data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pysus import sinasc\n", + "\n", + "df = sinasc(state=\"RJ\", year=2023)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Documentation\n", + "\n", + "- [PySUS GitHub](https://github.com/InfoDengue/PySUS)\n", + "- [PySUS Docs](https://pysus.readthedocs.io/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker/scripts/entrypoint.sh b/docker/scripts/entrypoint.sh index 3803b594..c43055fe 100755 --- a/docker/scripts/entrypoint.sh +++ b/docker/scripts/entrypoint.sh @@ -7,4 +7,23 @@ if [ -z "$jupyter_lab_path" ]; then exit 1 fi -$jupyter_lab_path lab --browser='firefox' --allow-root --NotebookApp.token='' --NotebookApp.password='' +rm -rf \ + /home/pysus/.local/share/jupyter/runtime \ + /home/pysus/.local/share/jupyter/notebook_secret \ + /home/pysus/.jupyter/lab/workspaces + +mkdir -p /home/pysus/.jupyter + +cat > /home/pysus/.jupyter/jupyter_server_config.py << 'EOF' +import logging + +logging.getLogger("jupyter_server.services.kernels").setLevel(logging.ERROR) +EOF + +$jupyter_lab_path lab --ip=0.0.0.0 --ServerApp.open_browser=False --ServerApp.default_url=/lab/tree/Welcome.ipynb --NotebookApp.token='' --NotebookApp.password='' & + +sleep 3 + +echo "Open http://127.0.0.1:8888/lab in your browser" + +wait diff --git a/docker/scripts/poetry-install.sh b/docker/scripts/poetry-install.sh index 8f82276a..0499a555 100644 --- a/docker/scripts/poetry-install.sh +++ b/docker/scripts/poetry-install.sh @@ -3,4 +3,4 @@ set -ex poetry config virtualenvs.create false -poetry install --without geo +poetry install --with docs diff --git a/pysus/api/_impl/databases.py b/pysus/api/_impl/databases.py index e36b2a11..9f9078d4 100644 --- a/pysus/api/_impl/databases.py +++ b/pysus/api/_impl/databases.py @@ -84,7 +84,25 @@ async def _fetch(): else pd.DataFrame() ) - return asyncio.run(_fetch()) + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop and loop.is_running(): + try: + import nest_asyncio # noqa: PLC0415 + + nest_asyncio.apply() + except ImportError: + msg = ( + "nest_asyncio is required when running inside Jupyter. " + "Install it with: pip install nest_asyncio" + ) + raise RuntimeError(msg) from None + return loop.run_until_complete(_fetch()) + else: + return asyncio.run(_fetch()) def sinan( diff --git a/pysus/api/client.py b/pysus/api/client.py index f2cf0913..7dd06c92 100644 --- a/pysus/api/client.py +++ b/pysus/api/client.py @@ -508,11 +508,14 @@ def get_columns(path: Path) -> set[tuple[str, str]]: if not geocode_cols: return base - duckdb.create_function( - "__pysus_add_dv", - _add_dv_fn, - null_handling="special", - ) + try: + duckdb.create_function( + "__pysus_add_dv", + _add_dv_fn, + null_handling="special", + ) + except duckdb.NotImplementedException: + pass selects = [ ( f'__pysus_add_dv("{c[0]}") AS "{c[0]}"' diff --git a/pysus/tests/api/ftp/test_models.py b/pysus/tests/api/ftp/test_models.py index 2dbc0176..64bb46e5 100644 --- a/pysus/tests/api/ftp/test_models.py +++ b/pysus/tests/api/ftp/test_models.py @@ -71,8 +71,8 @@ async def test_directory_load(mock_client, mock_dataset): assert len(content) == 2 assert isinstance(content[0], Directory) assert isinstance(content[1], File) - assert str(content[0].path) == "/root/subdir" - assert str(content[1].path) == "/root/file.dbc" + assert Path(content[0].path) == Path("/root/subdir") + assert Path(content[1].path) == Path("/root/file.dbc") @pytest.mark.asyncio From b8319d8d1429f488a093c209b880d85ef30ee842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= Date: Mon, 18 May 2026 15:55:58 -0300 Subject: [PATCH 2/2] chore: include docker hub publish --- .github/workflows/python-package.yml | 28 +++------------ .github/workflows/release.yaml | 53 +++++++++++++++++++++++----- README.md | 9 ++++- docker/Dockerfile | 2 ++ pysus/api/extensions.py | 10 ++---- 5 files changed, 63 insertions(+), 39 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e68d3182..d6e8f177 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -7,10 +7,6 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: ${{ (matrix.os == 'windows-latest' && 30) || 15 }} - defaults: - run: - shell: bash -l {0} - strategy: matrix: os: [ubuntu-latest, windows-latest] @@ -33,6 +29,7 @@ jobs: conda-solver: libmamba - name: Install dependencies + shell: bash -l {0} run: | pip install poetry poetry-plugin-export poetry config virtualenvs.create false @@ -46,10 +43,12 @@ jobs: - name: Linting if: matrix.os == 'ubuntu-latest' + shell: bash -l {0} run: pre-commit run --files pysus/**/* - name: Tests (Linux) if: matrix.os != 'windows-latest' + shell: bash -l {0} run: | poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15 --cov=pysus --cov-report=xml:coverage.xml --cov-report=term-missing @@ -57,25 +56,8 @@ jobs: if: matrix.os == 'windows-latest' shell: bash -l {0} run: | - for i in 1 2 3; do - echo "Attempt $i..." - poetry run python -m pytest -vv pysus/tests/ --retries 3 --retry-delay 15 -p no:cacheprovider & - PID=$! - for j in $(seq 1 480); do - if ! kill -0 $PID 2>/dev/null; then - wait $PID - EXIT_CODE=$? - if [ $EXIT_CODE -eq 0 ]; then - exit 0 - fi - break - fi - sleep 1 - done - kill $PID 2>/dev/null || true - echo "Attempt $i failed" - done - exit 1 + export DUCKDB_NO_THREADS=1 + poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15 --timeout=480 -p no:cacheprovider -p no:asyncio - name: Upload coverage to Codecov if: matrix.os == 'ubuntu-latest' diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 92158300..68c77178 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -2,23 +2,27 @@ name: release on: workflow_dispatch: + inputs: + version: + description: "Version tag for Docker image (e.g., 9.9.9-test)" + required: false + type: string push: - branches: [ main ] + branches: [main] + tags: + - "[0-9]*" pull_request: - branches: [ main ] + branches: [main] concurrency: - group: ci-${{ github.ref }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} jobs: build: + if: ${{ !startsWith(github.ref, 'refs/tags/') }} runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} - steps: - uses: actions/checkout@v4 @@ -44,7 +48,40 @@ jobs: poetry config pypi-token.pypi ${PYPI_TOKEN} make release + docker-push: + if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: alertadengue + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Extract version + id: version + run: | + if [ -n "${{ inputs.version }}" ]; then + echo "tag=${{ inputs.version }}" >> "$GITHUB_OUTPUT" + else + echo "tag=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT" + fi + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + file: docker/Dockerfile + push: true + tags: | + alertadengue/pysus:latest + alertadengue/pysus:${{ steps.version.outputs.tag }} + docs: + if: ${{ !startsWith(github.ref, 'refs/tags/') }} runs-on: ubuntu-latest defaults: diff --git a/README.md b/README.md index 356f9c95..f491f43f 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,14 @@ pip install pysus[tui] ### Docker -Build and start a JupyterLab container with PySUS pre-installed: +A pre-built JupyterLab image is available on Docker Hub: + +```bash +docker pull alertadengue/pysus +docker run -p 8888:8888 alertadengue/pysus +``` + +Or build locally and start the container: ```bash docker compose -f docker/docker-compose.yaml up --build diff --git a/docker/Dockerfile b/docker/Dockerfile index 74af492d..00d3a3ab 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,6 +1,8 @@ FROM python:3.12-slim LABEL maintainer="luabidaa@gmail.com" +LABEL org.opencontainers.image.source="https://github.com/AlertaDengue/PySUS" +LABEL org.opencontainers.image.description="PySUS - Data extraction from Brazilian public health systems" ENV DEBIAN_FRONTEND=noninteractive ENV HOME "/home/pysus" diff --git a/pysus/api/extensions.py b/pysus/api/extensions.py index 27746abd..6acb9c86 100644 --- a/pysus/api/extensions.py +++ b/pysus/api/extensions.py @@ -14,12 +14,6 @@ from typing import ClassVar import chardet - -try: - import magic -except (ImportError, OSError): - magic = None # type: ignore[assignment] - import pandas as pd import pyarrow as pa import pyarrow.parquet as pq @@ -837,7 +831,9 @@ class ExtensionFactory: @classmethod async def _identify(cls, path: Path) -> type[BaseLocalFile] | None: """Identify the file class by its MIME type.""" - if magic is None: + try: + import magic + except (ImportError, OSError): return None try: mime = await to_thread.run_sync(