Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ baselines: llm-baselines naive-and-dummy-forecasters
manifold: manifold-fetch manifold-update-questions

manifold-fetch:
$(MAKE) -C src/questions/manifold/fetch || echo "* $@" >> $(MAKE_FAILURE_LOG)
$(MAKE) -C src/orchestration/func_manifold_fetch || echo "* $@" >> $(MAKE_FAILURE_LOG)

manifold-update-questions:
$(MAKE) -C src/questions/manifold/update_questions || echo "* $@" >> $(MAKE_FAILURE_LOG)
$(MAKE) -C src/orchestration/func_manifold_update || echo "* $@" >> $(MAKE_FAILURE_LOG)

metaculus: metaculus-fetch metaculus-update-questions

Expand Down
12 changes: 12 additions & 0 deletions src/_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,18 @@ class InferFetchFrame(QuestionFrame):
nullify_question: Series[bool]


class ManifoldFetchFrame(pa.DataFrameModel):
"""Output of ManifoldSource.fetch(). Just market IDs from search-markets endpoint."""

id: Series[str]

class Config:
"""Schema configuration."""

strict = False
coerce = True


class AcledResolutionFrame(pa.DataFrameModel):
"""ACLED-specific: aggregated events by country and date.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,38 @@ UPLOAD_DIR = upload
.gcloudignore:
cp -r $(ROOT_DIR)src/helpers/.gcloudignore .

Procfile:
cp -r $(ROOT_DIR)src/helpers/Procfile .
Dockerfile: $(ROOT_DIR)src/helpers/Dockerfile.template
sed \
-e 's/REGION/$(CLOUD_DEPLOY_REGION)/g' \
-e 's/STACK/google-22-full/g' \
-e 's/PYTHON_VERSION/python312/g' \
$< > Dockerfile

deploy : main.py .gcloudignore requirements.txt Procfile
deploy : .gcloudignore requirements.txt Dockerfile
mkdir -p $(UPLOAD_DIR)
cp -r $(ROOT_DIR)utils $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/helpers
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/sources
mkdir -p $(UPLOAD_DIR)/orchestration
cp $(ROOT_DIR)src/orchestration/__init__.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_io.py $(UPLOAD_DIR)/orchestration/
cp $^ $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/orchestration/_source_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp main.py $(UPLOAD_DIR)/main.py
cp requirements.txt $(UPLOAD_DIR)/requirements.txt
cp Dockerfile $(UPLOAD_DIR)/
gcloud run jobs deploy \
func-data-manifold-fetch \
--project $(CLOUD_PROJECT) \
--region $(CLOUD_DEPLOY_REGION) \
--tasks 1 \
--parallelism 1 \
--task-timeout 30s \
--task-timeout 560s \
--memory 512Mi \
--max-retries 0 \
--service-account $(QUESTION_BANK_BUCKET_SERVICE_ACCOUNT) \
--set-env-vars $(DEFAULT_CLOUD_FUNCTION_ENV_VARS) \
--source $(UPLOAD_DIR)

clean :
rm -rf $(UPLOAD_DIR) .gcloudignore Procfile
rm -rf $(UPLOAD_DIR) .gcloudignore Dockerfile
30 changes: 30 additions & 0 deletions src/orchestration/func_manifold_fetch/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Manifold fetch entry point."""

from __future__ import annotations

import logging
from typing import Any

from helpers import decorator
from orchestration import _source_io
from sources.manifold import ManifoldSource

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

SOURCE = "manifold"


@decorator.log_runtime
def driver(_: Any) -> None:
"""Fetch Manifold market IDs and upload to question bank."""
source = ManifoldSource()

dff = source.fetch()

_source_io.write_fetch_output(SOURCE, dff)
logger.info("Done.")


if __name__ == "__main__":
driver(None)
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
google-cloud-storage
google-cloud-secret-manager
pandas>=2.2.2,<3.0
pyarrow
backoff
certifi
pandera
termcolor
requests
certifi
backoff
numpy
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,26 @@ UPLOAD_DIR = upload
.gcloudignore:
cp -r $(ROOT_DIR)src/helpers/.gcloudignore .

Procfile:
cp -r $(ROOT_DIR)src/helpers/Procfile .
Dockerfile: $(ROOT_DIR)src/helpers/Dockerfile.template
sed \
-e 's/REGION/$(CLOUD_DEPLOY_REGION)/g' \
-e 's/STACK/google-22-full/g' \
-e 's/PYTHON_VERSION/python312/g' \
$< > Dockerfile

deploy : main.py .gcloudignore requirements.txt Procfile
deploy : .gcloudignore requirements.txt Dockerfile
mkdir -p $(UPLOAD_DIR)
cp -r $(ROOT_DIR)utils $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/helpers
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/sources
mkdir -p $(UPLOAD_DIR)/orchestration
cp $(ROOT_DIR)src/orchestration/__init__.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_io.py $(UPLOAD_DIR)/orchestration/
cp $^ $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/orchestration/_source_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp main.py $(UPLOAD_DIR)/main.py
cp requirements.txt $(UPLOAD_DIR)/requirements.txt
cp Dockerfile $(UPLOAD_DIR)/
gcloud run jobs deploy \
func-data-manifold-update-questions \
--project $(CLOUD_PROJECT) \
Expand All @@ -37,4 +43,4 @@ deploy : main.py .gcloudignore requirements.txt Procfile
--source $(UPLOAD_DIR)

clean :
rm -rf $(UPLOAD_DIR) .gcloudignore Procfile
rm -rf $(UPLOAD_DIR) .gcloudignore Dockerfile
55 changes: 55 additions & 0 deletions src/orchestration/func_manifold_update/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Manifold update entry point."""

from __future__ import annotations

import logging
from typing import Any

from helpers import data_utils, decorator, env
from orchestration import _source_io
from sources.manifold import ManifoldSource
from utils import gcp

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

SOURCE = "manifold"


@decorator.log_runtime
def driver(_: Any) -> None:
"""Update Manifold questions and resolution files."""
source = ManifoldSource()

dfq, dff = data_utils.get_data_from_cloud_storage(
SOURCE, return_question_data=True, return_fetch_data=True
)

logger.info("Loading existing resolution files...")
# No ids= filter: load ALL existing resolution files so that we
# matching the old non-refactored behaviour.
# TODO: we can drop pre-benchmark start date history/ids.
existing_resolution_files = _source_io.load_existing_resolution_files(SOURCE)
logger.info(f"Loaded {len(existing_resolution_files)} resolution files")

files_in_storage = gcp.storage.list_with_prefix(
bucket_name=env.QUESTION_BANK_BUCKET, prefix=SOURCE
)

result = source.update(
dfq,
dff,
existing_resolution_files=existing_resolution_files,
files_in_storage=files_in_storage,
)

logger.info("Uploading to GCP...")
data_utils.upload_questions(result.dfq, SOURCE)
if result.resolution_files:
_source_io.upload_resolution_files(SOURCE, result.resolution_files)

logger.info("Done.")


if __name__ == "__main__":
driver(None)
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
google-cloud-storage
google-cloud-secret-manager
pandas>=2.2.2,<3.0
pyarrow
backoff
certifi
pandera
termcolor
requests
certifi
backoff
numpy
3 changes: 0 additions & 3 deletions src/questions/manifold/.gcloudignore

This file was deleted.

145 changes: 0 additions & 145 deletions src/questions/manifold/fetch/main.py

This file was deleted.

Loading
Loading