Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CLIENT_ORIGINS=https://fake-origin.example.com
CLIENT_ORIGINS_REGEX="^http://fake-localhost:.*"

ENV=development
##### MISTRAL AZURE #####
AZURE_MISTRAL_SWEDEN_API_BASE=https://fake-mistral-sweden.example.com/models
AZURE_MISTRAL_SWEDEN_API_KEY=FAKE_MISTRAL_SWEDEN_API_KEY
Expand Down
1 change: 1 addition & 0 deletions k8s/welearn-api/values.dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ config:
PG_HOST: dev-lab-projects-backend.postgres.database.azure.com
TIKA_URL_BASE: https://tika.k8s.lp-i.dev/
DATA_COLLECTION_ORIGIN_PREFIX: welearn
ENV: development
allowedHostsRegexes:
mainUrl: |-
https:\/\/welearn\.k8s\.lp-i\.dev
Expand Down
1 change: 1 addition & 0 deletions k8s/welearn-api/values.prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ config:
PG_HOST: prod-prod-projects-backend.postgres.database.azure.com
TIKA_URL_BASE: https://tika.k8s.lp-i.org/
DATA_COLLECTION_ORIGIN_PREFIX: workshop
ENV: production
allowedHostsRegexes:
alphaUrls: |-
https://[a-zA-Z0-9-]*\.alpha-welearn\.lp-i\.org
Expand Down
1 change: 1 addition & 0 deletions k8s/welearn-api/values.staging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ config:
PG_DATABASE: welearn_datastack_staging
TIKA_URL_BASE: https://tika.k8s.lp-i.dev/
DATA_COLLECTION_ORIGIN_PREFIX: welearn
ENV: staging
allowedHostsRegexes:
mainUrl: |-
https:\/\/welearn\.k8s\.lp-i\.xyz
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ env =
TIKA_URL_BASE=https://tika.example.com
USE_CACHED_SETTINGS=True
DATA_COLLECTION_ORIGIN_PREFIX=workshop
ENV=test

filterwarnings =
ignore:.*U.*mode is deprecated:DeprecationWarning
5 changes: 3 additions & 2 deletions src/app/api/api_v1/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

from fastapi import APIRouter

from src.app.api.api_v1.endpoints import chat, metric, micro_learning, user
from src.app.api.api_v1.endpoints import chat, metric, micro_learning
from src.app.search.api import router as search_router
from src.app.tutor.api import router as tutor_router
from src.app.user.api import router as user_router

api_router = APIRouter()
api_router.include_router(chat.router, prefix="/qna", tags=["qna"])
Expand All @@ -14,7 +15,7 @@
api_router.include_router(
micro_learning.router, prefix="/micro_learning", tags=["micro_learning"]
)
api_router.include_router(user.router, prefix="/user", tags=["user"])
api_router.include_router(user_router.router, prefix="/user", tags=["user"])


api_tags_metadata = [
Expand Down
5 changes: 3 additions & 2 deletions src/app/api/api_v1/endpoints/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
)
from src.app.shared.infra.abst_chat import get_chat_service
from src.app.shared.utils.dependencies import get_settings
from src.app.shared.utils.requests import extract_session_cookie
from src.app.utils.logger import logger as utils_logger

logger = utils_logger(__name__)
Expand Down Expand Up @@ -256,7 +257,7 @@ async def q_and_a_ans(
str: openai chat completion content
"""

session_id = request.headers.get("X-Session-ID")
session_id = extract_session_cookie(request)

try:
content = await chatfactory.chat_message(
Expand Down Expand Up @@ -354,7 +355,7 @@ async def agent_response(
data_collection=Depends(get_data_collection_service),
) -> Optional[Dict]:
try:
session_id = request.headers.get("X-Session-ID")
session_id = extract_session_cookie(request)
docs = []

if body.query is None:
Expand Down
3 changes: 2 additions & 1 deletion src/app/api/api_v1/endpoints/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from src.app.services.data_collection import get_data_collection_service
from src.app.services.sql_db.queries import get_document_qty_table_info_sync
from src.app.shared.utils.dependencies import get_settings
from src.app.shared.utils.requests import extract_session_cookie
from src.app.utils.logger import logger as utils_logger

logger = utils_logger(__name__)
Expand Down Expand Up @@ -55,6 +56,6 @@ async def update_clicked_doc_from_chat_message(
async def register_syllabus_download(
request: Request, data_collection=Depends(get_data_collection_service)
) -> str:
session_id = request.headers.get("X-Session-ID")
session_id = extract_session_cookie(request)
await data_collection.register_syllabus_download(session_id)
return "registered"
1 change: 1 addition & 0 deletions src/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def get_api_version(self) -> dict:
AZURE_API_VERSION: str

LLM_MODEL_NAME: str
ENV: str
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ENV is now a required Settings field. Because settings = Settings() is instantiated at import time, missing ENV will prevent the app from starting (ValidationError). If you want backward compatibility / safer local runs, consider giving ENV a default (e.g., "development") and/or validating it as a constrained enum.

Suggested change
ENV: str
ENV: str = "development"

Copilot uses AI. Check for mistakes.

# PG
PG_USER: Optional[str] = None
Expand Down
5 changes: 3 additions & 2 deletions src/app/middleware/monitor_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from starlette.middleware.base import BaseHTTPMiddleware

from src.app.services.sql_db.queries import register_endpoint
from src.app.shared.utils.requests import extract_session_cookie
from src.app.utils.logger import logger as logger_utils

logger = logger_utils(__name__)
Expand All @@ -11,7 +12,7 @@
class MonitorRequestsMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
if request.url.path.startswith("/api/v1/"):
session_id = request.headers.get("X-Session-ID")
session_id = extract_session_cookie(request)
if session_id:
try:
await run_in_threadpool(
Expand All @@ -24,7 +25,7 @@ async def dispatch(self, request: Request, call_next):
logger.error(f"Failed to register endpoint {request.url.path}: {e}")
else:
logger.warning(
f"No X-Session-ID header provided for {request.url.path}"
f"No X-Session-ID cookie provided for {request.url.path}"
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning message still refers to "X-Session-ID" even though the code now reads the session from a cookie (x-session-id). Consider updating the log message to reference the actual cookie name (ideally using SESSION_COOKIE_NAME) to avoid confusion during ops/debugging.

Suggested change
f"No X-Session-ID cookie provided for {request.url.path}"
f"No x-session-id cookie provided for {request.url.path}"

Copilot uses AI. Check for mistakes.
)

response = await call_next(request)
Expand Down
3 changes: 2 additions & 1 deletion src/app/search/api/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ModelNotFoundError,
bad_request,
)
from src.app.shared.utils.requests import extract_session_cookie
from src.app.utils.logger import logger as logger_utils

router = APIRouter()
Expand Down Expand Up @@ -206,7 +207,7 @@ async def search_all(
data_collection=Depends(get_data_collection_service),
):
try:
session_id = request.headers.get("X-Session-ID")
session_id = extract_session_cookie(request)

res = await sp.search_handler(
qp=qp, method=SearchMethods.BY_DOCUMENT, background_tasks=background_tasks
Expand Down
12 changes: 4 additions & 8 deletions src/app/services/data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_campaign_state(

async def register_search_data(
self,
session_id: str | None,
session_id: uuid.UUID | None,
query: str,
search_results: list[Document | ScoredPoint],
sdg_filter: list[int] | None = None,
Expand All @@ -88,9 +88,7 @@ async def register_search_data(
},
)

user_id = await run_in_threadpool(
get_user_from_session_id, uuid.UUID(session_id)
)
user_id = await run_in_threadpool(get_user_from_session_id, session_id)

if not user_id:
raise HTTPException(
Expand Down Expand Up @@ -241,7 +239,7 @@ async def register_syllabus_update(

async def register_chat_data(
self,
session_id: str | None,
session_id: uuid.UUID | None,
user_query: str,
conversation_id: uuid.UUID | None,
answer_content: str,
Expand All @@ -264,9 +262,7 @@ async def register_chat_data(
},
)

user_id = await run_in_threadpool(
get_user_from_session_id, uuid.UUID(session_id)
)
user_id = await run_in_threadpool(get_user_from_session_id, session_id)

if not user_id:
raise HTTPException(
Expand Down
21 changes: 20 additions & 1 deletion src/app/services/sql_db/queries_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import uuid
from datetime import datetime, timedelta

from fastapi import HTTPException
from sqlalchemy.sql import select
from welearn_database.data.models import Bookmark, InferredUser, Session

Expand All @@ -27,6 +28,12 @@ def get_or_create_user_sync(
).first()
if user:
return user.id
else:
logger.error(f"User with id {user_id} not found,")
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error log message has a trailing comma ("not found,"). If this is meant to be a full sentence, remove the trailing comma to keep logs clean and consistent.

Suggested change
logger.error(f"User with id {user_id} not found,")
logger.error(f"User with id {user_id} not found")

Copilot uses AI. Check for mistakes.
raise HTTPException(
status_code=404, detail=f"User with id {user_id} not found"
)

user = InferredUser(origin_referrer=referer)
s.add(user)
s.commit()
Expand Down Expand Up @@ -70,11 +77,23 @@ def get_or_create_session_sync(
return new_session.id


def get_user_from_session_id(session_id: uuid.UUID) -> uuid.UUID | None:
def get_user_from_session_id(session_id: uuid.UUID | None) -> uuid.UUID | None:
if not session_id:
return None

with session_maker() as s:
session = s.execute(select(Session).where(Session.id == session_id)).first()
if session:
logger.info(
"Valid session. user_id=%s session_id=%s",
session[0].inferred_user_id,
session_id,
)
return session[0].inferred_user_id
Comment on lines 84 to 92
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_user_from_session_id currently treats any session row as valid and does not check Session.end_at > now (unlike get_or_create_session_sync). This can make expired sessions resolve to a user indefinitely, especially problematic now that cookies can persist long-term. Consider filtering by end_at in the query (and/or deleting/invalidating expired sessions).

Copilot uses AI. Check for mistakes.
else:
HTTPException(
status_code=404, detail=f"Session with id {session_id} not found"
)
Comment on lines +93 to +96
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_user_from_session_id instantiates an HTTPException in the else branch but never raises it, so the exception is effectively ignored and the function falls through returning None. Either raise the exception or remove it and just return None (and let callers decide how to handle missing sessions).

Suggested change
else:
HTTPException(
status_code=404, detail=f"Session with id {session_id} not found"
)

Copilot uses AI. Check for mistakes.
return None


Expand Down
4 changes: 4 additions & 0 deletions src/app/shared/domain/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@
}

APP_NAME = "welearn-api"


SESSION_COOKIE_NAME = "x-session-id"
SESSION_TTL_SECONDS = 60 * 60 * 24 * 400 # 400 days
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SESSION_TTL_SECONDS is set to ~400 days, but get_or_create_session_sync currently creates DB sessions with end_at = now + 24h. This mismatch will cause long-lived cookies to reference expired DB sessions (and can lead to frequent session re-creation / unexpected identity behavior). Consider aligning cookie TTL with DB session validity (or renaming this to a long-lived user-tracking cookie and adjusting the DB model accordingly).

Suggested change
SESSION_TTL_SECONDS = 60 * 60 * 24 * 400 # 400 days
SESSION_TTL_SECONDS = 60 * 60 * 24 # 1 day

Copilot uses AI. Check for mistakes.
25 changes: 25 additions & 0 deletions src/app/shared/utils/requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from typing import Optional
from uuid import UUID

from fastapi import Request

from src.app.shared.domain.constants import SESSION_COOKIE_NAME
from src.app.utils.logger import logger as utils_logger

logger = utils_logger(__name__)


def extract_session_cookie(request: Request) -> Optional[UUID]:
cookie_value = request.cookies.get(SESSION_COOKIE_NAME)
if not cookie_value:
return None

try:
return UUID(cookie_value)
except ValueError:
logger.warning("Invalid session cookie format: %s", cookie_value)
return None


def extract_origin_from_request(request: Request) -> str:
return request.headers.get("origin", "unknown")
6 changes: 3 additions & 3 deletions src/app/tests/api/api_v1/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ async def test_search_all_no_collections(self, *mocks):
headers={
"X-API-Key": "test",
"origin": "test.com",
"X-Session-ID": str(uuid.uuid4()),
"Cookie": f"x-session-id={str(uuid.uuid4())}",
},
)
self.assertEqual(response.status_code, 404)
Expand All @@ -349,7 +349,7 @@ async def test_search_all_no_result(self, *mocks):
headers={
"X-API-Key": "test",
"origin": "test.com",
"X-Session-ID": str(uuid.uuid4()),
"Cookie": f"x-session-id={str(uuid.uuid4())}",
}, # noqa: E501
)

Expand All @@ -363,7 +363,7 @@ async def test_search_all_no_query(self, *mocks):
headers={
"X-API-Key": "test",
"origin": "test.com",
"X-Session-Id": str(uuid.uuid4()),
"Cookie": f"x-session-id={str(uuid.uuid4())}",
},
)
self.assertEqual(response.status_code, 400)
Expand Down
35 changes: 21 additions & 14 deletions src/app/tests/api/api_v1/test_user.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
import uuid
from unittest import mock
from unittest.mock import MagicMock

Expand Down Expand Up @@ -231,30 +232,36 @@ async def test_get_user_bookmarks_success_empty(self, session_maker_mock, *mocks
session.execute.return_value.all.return_value = []
session_maker_mock.return_value.__enter__.return_value = session

user_id = "22222222-2222-2222-2222-222222222222"
response = client.get(
f"{settings.API_V1_STR}/user/:user_id/bookmarks",
params={"user_id": user_id},
f"{settings.API_V1_STR}/user/bookmarks",
headers={"X-API-Key": "test"},
cookies={"x-session-id": "bdb62bb2-1fe5-4d14-92fd-60a041355aea"},
)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json(), {"bookmarks": []})

@mock.patch("src.app.services.sql_db.queries_user.session_maker")
async def test_add_user_bookmark_success(self, session_maker_mock, *mocks):
"""Ajout d'un bookmark"""
session = MagicMock()
session.execute.return_value.first.side_effect = [MagicMock(id="user-1"), None]
session_maker_mock.return_value.__enter__.return_value = session
@mock.patch("src.app.user.api.router.run_in_threadpool")
@mock.patch("src.app.user.api.router.resolve_user_and_session")
async def test_add_user_bookmark_success(
self, resolve_user_and_session_mock, run_in_threadpool_mock, *mocks
):
"""Ajout d'un bookmark - mocks only what is needed"""
# Mock resolve_user_and_session to return user_id and session_id
user_id = uuid.UUID("cfc8072c-a055-442a-9878-b5a73d9141b2")
session_id = uuid.UUID("bdb62bb2-1fe5-4d14-92fd-60a041355aea")
resolve_user_and_session_mock.return_value = (user_id, session_id)

user_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
# Mock run_in_threadpool to simulate DB add_user_bookmark_sync
document_id = "ffffffff-ffff-ffff-ffff-ffffffffffff"
run_in_threadpool_mock.return_value = document_id

response = client.post(
f"{settings.API_V1_STR}/user/:user_id/bookmarks/:document_id",
params={"user_id": user_id, "document_id": document_id},
f"{settings.API_V1_STR}/user/bookmarks/:document_id",
params={"document_id": document_id},
headers={"X-API-Key": "test"},
cookies={"x-session-id": str(session_id)},
)
Comment on lines 258 to 263
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test calls /user/bookmarks/:document_id (literal :document_id) and passes document_id via query params. If the intent is to have a path parameter, the route should be /user/bookmarks/{document_id} and the test should call /user/bookmarks/<uuid>. If the intent is a query parameter, remove :document_id from the URL to avoid confusion.

Copilot uses AI. Check for mistakes.
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json(), {"added": document_id})
session.add.assert_called_once()
session.commit.assert_called_once()
resolve_user_and_session_mock.assert_called_once()
run_in_threadpool_mock.assert_called_once()
Loading
Loading