Skip to content
Draft
3 changes: 3 additions & 0 deletions cms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,9 @@ def make_lms_template_path(settings):

'openedx_events',

# Core models to represent courses
"openedx_catalog",

# Core apps that power libraries
"openedx_content",
*openedx_content_backcompat_apps_to_install(),
Expand Down
3 changes: 3 additions & 0 deletions lms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,9 @@

'openedx_events',

# Core models to represent courses
"openedx_catalog",

# Core apps that power libraries
"openedx_content",
*openedx_content_backcompat_apps_to_install(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""
Data migration to populate the new CourseRun and CatalogCourse models.
"""

# Generated by Django 5.2.11 on 2026-02-13 21:47
import logging

from django.conf import settings
from django.db import migrations
from organizations.api import ensure_organization, exceptions as org_exceptions

log = logging.getLogger(__name__)

# https://github.com/openedx/openedx-platform/issues/38036
NORMALIZE_LANGUAGE_CODES = {
"zh-hans": "zh-cn",
"zh-hant": "zh-hk",
"ca@valencia": "ca-es-valencia",
}


def backfill_openedx_catalog(apps, schema_editor):
"""
Populate the new CourseRun and CatalogCourse models.
"""
# CourseOverview is a cache model derived from modulestore; modulestore is the source of truth for courses, so we'll
# use it to get the list of "all courses on the system" to populate the new CourseRun and CatalogCourse models.
CourseIndex = apps.get_model("split_modulestore_django", "SplitModulestoreCourseIndex")
CourseOverview = apps.get_model("course_overviews", "CourseOverview")
CatalogCourse = apps.get_model("openedx_catalog", "CatalogCourse")
CourseRun = apps.get_model("openedx_catalog", "CourseRun")

created_catalog_course_ids: set[int] = set()
all_course_runs = CourseIndex.objects.filter(base_store="mongodb", library_version="").order_by("course_id")
for course_run in all_course_runs:
org_code: str = course_run.course_id.org
course_code: str = course_run.course_id.course
run_code: str = course_run.course_id.run

# Ensure that the Organization exists.
try:
org_data = ensure_organization(org_code)
except org_exceptions.InvalidOrganizationException as exc:
# Note: IFF the org exists among the modulestore courses but not in the Organizations database table,
# and if auto-create is disabled (it's enabled by default), this will raise InvalidOrganizationException. It
# would be up to the operator to decide how they want to resolve that.
raise ValueError(
f'The organization short code "{org_code}" exists in modulestore ({course_run.course_id}) but '
"not the Organizations table, and auto-creating organizations is disabled. You can resolve this by "
"creating the Organization manually (e.g. from the Django admin) or turning on auto-creation. "
"You can set active=False to prevent this Organization from being used other than for historical data. "
) from exc
if org_data["short_name"] != org_code:
# On most installations, the 'short_name' database column is case insensitive (unfortunately)
log.warning(
'The course with ID "%s" does not match its Organization.short_name "%s"',
course_run.course_id,
org_data["short_name"],
)

# Fetch the CourseOverview if it exists
try:
course_overview = CourseOverview.objects.get(id=course_run.course_id)
except CourseOverview.DoesNotExist:
course_overview = None # Course exists in modulestore but details aren't cached into CourseOverview yet
display_name: str = (course_overview.display_name if course_overview else None) or course_code

# Determine the course language.
# Note that in Studio, the options for course language generally came from the ALL_LANGUAGES setting, which is
# mostly two-letter language codes with no locale, except it uses "zh_HANS" for Mandarin and "zh_HANT" for
# Cantonese. We normalize those to "zh-cn" and "zh-hk" for consistency with our platform UI languages /
# Transifex, but you can still access the "old" version using the CatalogCourse.language_short
# getter/setter for backwards compatbility. See https://github.com/openedx/openedx-platform/issues/38036
language = settings.LANGUAGE_CODE
if course_overview and course_overview.language:
language = course_overview.language.lower()
language = language.replace("_", "-") # Ensure we use hyphens for consistency (`en-us` not `en_us`)
# Normalize this language code. The previous/non-normalized code will still be available via the
# "language_short" property for backwards compatibility.
language = NORMALIZE_LANGUAGE_CODES.get(language, language)
if len(language) > 2 and language[2] != "-":
# This seems like an invalid value; revert to the default:
log.warning(
'The course with ID "%s" has invalid language "%s" - using default language "%s" instead.',
course_run.course_id,
language,
settings.LANGUAGE_CODE,
)
language = settings.LANGUAGE_CODE

# Ensure that the CatalogCourse exists.
cc, cc_created = CatalogCourse.objects.get_or_create(
org_id=org_data["id"],
course_code=course_code,
defaults={
"display_name": display_name,
"language": language,
},
)
if cc_created:
created_catalog_course_ids.add(cc.pk)
elif cc.pk in created_catalog_course_ids:
# This CatalogCourse was previously created during this same migration
# Check if all the runs have the same display_name:
if (
course_overview
and course_overview.display_name
and course_overview.display_name != cc.display_name
and cc.display_name != course_code
):
# The runs have different names, so just use the course code as the common catalog course name.
cc.display_name = course_code
cc.save(update_fields=["display_name"])

if cc.course_code != course_code:
raise ValueError(
f"The course {course_run.course_id} exists in modulestore with a different capitalization of its "
f'course code compared to other instances of the same run ("{course_code}" vs "{cc.course_code}"). '
"This really should not happen. To fix it, delete the inconsistent course runs (!). "
)

# Create the CourseRun
new_run, run_created = CourseRun.objects.get_or_create(
catalog_course=cc,
run=run_code,
course_id=course_run.course_id,
defaults={"display_name": display_name},
)

# Correct the "created" timestamp. Since it has auto_now_add=True, we can't set its value except using update()
# The CourseOverview should have the "created" date unless it's missing or the course was created before
# the CourseOverview model existed. In any case, it should be good enough. Otherwise use the default (now).
if course_overview:
if course_overview.created < cc.created and cc.pk in created_catalog_course_ids:
# Use the 'created' date from the oldest course run that we process.
CatalogCourse.objects.filter(pk=cc.pk).update(created=course_overview.created)
if run_created:
CourseRun.objects.filter(pk=new_run.pk).update(created=course_overview.created)


class Migration(migrations.Migration):
dependencies = [
("openedx_catalog", "0001_initial"),
("course_overviews", "0029_alter_historicalcourseoverview_options"),
("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"),
]

operations = [
migrations.RunPython(backfill_openedx_catalog, reverse_code=migrations.RunPython.noop),
]
60 changes: 59 additions & 1 deletion openedx/core/djangoapps/content/course_overviews/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
Signal handler for invalidating cached course overviews
"""


import logging

from django.db import transaction
from django.db.models.signals import post_save
from django.dispatch import Signal
from django.dispatch.dispatcher import receiver

from openedx_catalog import api as catalog_api
from openedx_catalog.models_api import CourseRun
from openedx.core.djangoapps.signals.signals import COURSE_CERT_DATE_CHANGE
from xmodule.data import CertificatesDisplayBehaviors
from xmodule.modulestore.django import SignalHandler
Expand All @@ -33,6 +34,8 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable
"""
Catches the signal that a course has been published in Studio and updates the corresponding CourseOverview cache
entry.

Also sync course data to the openedx_catalog CourseRun model.
"""
try:
previous_course_overview = CourseOverview.objects.get(id=course_key)
Expand All @@ -41,6 +44,51 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable
updated_course_overview = CourseOverview.load_from_module_store(course_key)
_check_for_course_changes(previous_course_overview, updated_course_overview)

# Currently, SplitModulestoreCourseIndex is the ultimate source of truth for
# which courses exist. When a course is published, we sync that data to
# CourseOverview, and from CourseOverview to CourseRun.

# In the future, CourseRun will be the "source of truth" and each CourseRun
# may optionally point to content and get synced to CourseOverview.

# Ensure a CourseRun exists for this course
try:
course_run = catalog_api.get_course_run(course_key)
except CourseRun.DoesNotExist:
# Presumably this is a newly-created course. Create the CourseRun.
course_run = catalog_api.create_course_run_for_modulestore_course_with(
course_id=course_key,
display_name=updated_course_overview.display_name,
language_short=updated_course_overview.language,
)

# Keep the CourseRun up to date as the course is edited:
if updated_course_overview.display_name != course_run.display_name:
catalog_api.sync_course_run_details(course_key, display_name=updated_course_overview.display_name)
# If this course is the only run in the CatalogCourse, should we update the display_name of
# the CatalogCourse to match the run's new name? Currently the only way to edit the name of
# a CatalogCourse is via the Django admin. But it's also not used anywhere yet.

if (
updated_course_overview.language
and updated_course_overview.language != course_run.catalog_course.language_short
):
if course_run.catalog_course.runs.count() == 1:
# This is the only run in this CatalogCourse. Update the language of the CatalogCourse
catalog_api.update_catalog_course(
course_run.catalog_course,
language_short=updated_course_overview.language,
)
else:
LOG.warning(
'Course run "%s" language "%s" does not match its catalog course language, "%s"',
str(course_key),
updated_course_overview.language,
course_run.catalog_course.language_short,
)

# In the future, this will also sync schedule and other metadata to the CourseRun's related models


@receiver(SignalHandler.course_deleted)
def _listen_for_course_delete(sender, course_key, **kwargs): # pylint: disable=unused-argument
Expand All @@ -56,6 +104,16 @@ def _listen_for_course_delete(sender, course_key, **kwargs): # pylint: disable=
sender=None,
courserun_key=courserun_key,
)
# Delete the openedx_catalog CourseRun to keep it in sync:
try:
course_run_obj = catalog_api.get_course_run(course_key)
except CourseRun.DoesNotExist:
pass
else:
catalog_course = course_run_obj.catalog_course
catalog_api.delete_course_run(course_key)
if catalog_course.runs.count() == 0:
catalog_api.delete_catalog_course(catalog_course)


@receiver(post_save, sender=CourseOverview)
Expand Down
Loading
Loading