diff --git a/cms/djangoapps/modulestore_migrator/tasks.py b/cms/djangoapps/modulestore_migrator/tasks.py index c65c8af0c59c..c32f646bb145 100644 --- a/cms/djangoapps/modulestore_migrator/tasks.py +++ b/cms/djangoapps/modulestore_migrator/tasks.py @@ -7,7 +7,7 @@ import os import typing as t from dataclasses import dataclass -from datetime import datetime, timezone +from datetime import UTC, datetime, timezone from enum import Enum from gettext import ngettext @@ -341,6 +341,7 @@ def _import_structure( openedx_content equivalent. """ migration = source_data.migration + migration_start_time = datetime.now(UTC) migration_context = _MigrationContext( used_component_keys=set( LibraryUsageLocatorV2(target_library.key, block_type, block_id) # type: ignore[abstract] @@ -367,14 +368,35 @@ def _import_structure( repeat_handling_strategy=RepeatHandlingStrategy(migration.repeat_handling_strategy), preserve_url_slugs=migration.preserve_url_slugs, created_by=status.user_id, - created_at=datetime.now(timezone.utc), # noqa: UP017 + created_at=migration_start_time, ) - with content_api.bulk_draft_changes_for(migration.target.id) as change_log: + with content_api.bulk_draft_changes_for( + learning_package_id=migration.target.id, + changed_by=migration_context.created_by, + changed_at=migration_start_time, + ) as change_log: root_migrated_node = _migrate_node( context=migration_context, source_node=root_node, ) - change_log.save() + # Publishing is not allowed inside bulk_draft_changes_for(), so publish + # everything that was modified now that the context has exited. We use the + # change log to identify which drafts to publish. If the context produced + # no records, it deletes the change log on exit (clearing its PK), in which + # case there's nothing to publish and we return None so callers don't try + # to associate the deleted change log with the migration. + if not change_log.pk: + return None, root_migrated_node + if change_log.records.exists(): + drafts_to_publish = content_api.get_all_drafts(migration.target.id).filter( + entity_id__in=change_log.records.values_list("entity_id", flat=True), + ) + content_api.publish_from_drafts( + migration.target.id, + draft_qset=drafts_to_publish, + published_by=migration_context.created_by, + # published_at will be later than 'migration_start_time' as _migrate_node() may have taken quite a while. + ) return change_log, root_migrated_node @@ -408,7 +430,7 @@ def _populate_collection(user_id: int, migration: models.ModulestoreMigration) - ) if block_target_pks: content_api.add_to_collection( - learning_package_id=migration.target.pk, + learning_package_id=migration.target.id, collection_code=migration.target_collection.collection_code, entities_qset=PublishableEntity.objects.filter(id__in=block_target_pks), created_by=user_id, @@ -421,6 +443,7 @@ def _create_collection( library_key: LibraryLocatorV2, title: str, course_name: str | None = None, + created_by: int | None = None, ) -> Collection: """ Creates a collection in the given library @@ -446,6 +469,7 @@ def _create_collection( collection_key=modified_key, title=f"{title}{f'_{attempt}' if attempt > 0 else ''}", description=description, + created_by=created_by, ) except libraries_api.LibraryCollectionAlreadyExists: attempt += 1 @@ -705,6 +729,7 @@ def bulk_migrate_from_modulestore( library_key=target_library_locator, title=legacy_root_list[i].display_name, course_name=legacy_root_list[i].display_name if source_data.source.key.is_course else None, + created_by=user_id, ) ) _populate_collection(user_id, migration) @@ -894,11 +919,7 @@ def _migrate_container( created_by=context.created_by, ).publishable_entity_version - # Publish the container - libraries_api.publish_container_changes( - container.container_key, - context.created_by, - ) + # Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure. context.used_container_slugs.add(container.container_key.container_id) return container_publishable_entity_version, None @@ -969,11 +990,10 @@ def _migrate_component( # Create the new component version for it component_version = libraries_api.set_library_block_olx( - target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids, + target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids, created_by=context.created_by, ) - # Publish the component - libraries_api.publish_component_changes(target_key, context.created_by) + # Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure. context.used_component_keys.add(target_key) return component_version.publishable_entity_version, None diff --git a/cms/djangoapps/modulestore_migrator/tests/test_tasks.py b/cms/djangoapps/modulestore_migrator/tests/test_tasks.py index ae4ad1548937..2b583265feb5 100644 --- a/cms/djangoapps/modulestore_migrator/tests/test_tasks.py +++ b/cms/djangoapps/modulestore_migrator/tests/test_tasks.py @@ -370,8 +370,9 @@ def test_migrate_component_success(self): "problem", result.componentversion.component.component_type.name ) - # The component is published - self.assertFalse(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009 + # The component is left as a draft; publishing is the caller's responsibility + # (handled in _import_structure after bulk_draft_changes_for exits). + self.assertTrue(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009 def test_migrate_component_failure(self): """ @@ -802,8 +803,9 @@ def test_migrate_container_different_container_types(self): container_version = result.containerversion self.assertEqual(container_version.title, f"Test {block_type.title()}") # noqa: PT009 - # The container is published - self.assertFalse(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long + # The container is left as a draft; publishing is the caller's + # responsibility (handled in _import_structure after bulk_draft_changes_for exits). + self.assertTrue(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long def test_migrate_container_same_title(self): """ diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 4623b814d0a0..171932dc32d1 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -520,7 +520,7 @@ def searchable_doc_containers(object_id: OpaqueKey, container_type: str) -> dict else: log.warning(f"Unexpected key type for {object_id}") - except ObjectDoesNotExist: + except (ObjectDoesNotExist, lib_api.ContentLibraryBlockNotFound): log.warning(f"No library item found for {object_id}") if not containers: @@ -558,6 +558,9 @@ def searchable_doc_for_collection( if collection: assert collection.collection_code == collection_key.collection_id + # Collections themselves are not publishable entities, so don't have a "draft" or "published" version, but the + # entities they contain are publishable, so the number of entities in the collection may be different between + # draft and published views, if some draft entities are unpublished or are published but the draft is deleted. draft_num_children = content_api.filter_publishable_entities( collection.entities, has_draft=True, diff --git a/openedx/core/djangoapps/content_libraries/api/blocks.py b/openedx/core/djangoapps/content_libraries/api/blocks.py index e23322d3b29b..077b8e6a94c5 100644 --- a/openedx/core/djangoapps/content_libraries/api/blocks.py +++ b/openedx/core/djangoapps/content_libraries/api/blocks.py @@ -417,6 +417,9 @@ def set_library_block_olx( usage_key: LibraryUsageLocatorV2, new_olx_str: str, paths_to_media: dict | None = None, + # The following arg can be removed after https://github.com/openedx/openedx-core/pull/573 lands + # then we can presumably just get the name from the bulk_draft_changes_for context + created_by: int | None = None, ) -> ComponentVersion: """ Replace the OLX source of the given XBlock. @@ -488,6 +491,7 @@ def set_library_block_olx( 'block.xml': new_olx_media.pk, }, created=now, + created_by=created_by, ) return new_component_version @@ -897,6 +901,8 @@ def delete_library_block( try: component = get_component_from_usage_key(usage_key) + if component.versioning.draft is None: + raise Component.DoesNotExist("Component draft version was already deleted.") except Component.DoesNotExist: # There may be cases where entries are created in the # search index, but the component is not created diff --git a/openedx/core/djangoapps/content_libraries/tasks.py b/openedx/core/djangoapps/content_libraries/tasks.py index 75460f78a73f..daea33a8ced6 100644 --- a/openedx/core/djangoapps/content_libraries/tasks.py +++ b/openedx/core/djangoapps/content_libraries/tasks.py @@ -51,6 +51,7 @@ from openedx_events.content_authoring.data import ( ContentObjectChangedData, LibraryBlockData, + LibraryCollectionData, LibraryContainerData, ) from openedx_events.content_authoring.signals import ( @@ -59,11 +60,13 @@ LIBRARY_BLOCK_DELETED, LIBRARY_BLOCK_PUBLISHED, LIBRARY_BLOCK_UPDATED, + LIBRARY_COLLECTION_UPDATED, LIBRARY_CONTAINER_CREATED, LIBRARY_CONTAINER_DELETED, LIBRARY_CONTAINER_PUBLISHED, LIBRARY_CONTAINER_UPDATED, ) +from openedx_tagging import api as tagging_api from user_tasks.models import UserTaskArtifact from user_tasks.tasks import UserTask, UserTaskStatus from xblock.fields import Scope @@ -118,10 +121,11 @@ def send_change_events_for_modified_entities( for entity in entities: change = changes_by_entity_id[entity.id] + entity_opaque_key: LibraryUsageLocatorV2 | LibraryContainerLocator if hasattr(entity, "component"): # This is a library XBlock (component) - block_key = api.library_component_usage_key(library.library_key, entity.component) - event_data = LibraryBlockData(library_key=library.library_key, usage_key=block_key) + entity_opaque_key = api.library_component_usage_key(library.library_key, entity.component) + event_data = LibraryBlockData(library_key=library.library_key, usage_key=entity_opaque_key) if change.old_version is None and change.new_version: # .. event_implemented_name: LIBRARY_BLOCK_CREATED # .. event_type: org.openedx.content_authoring.library_block.created.v1 @@ -137,8 +141,8 @@ def send_change_events_for_modified_entities( LIBRARY_BLOCK_UPDATED.send_event(library_block=event_data) elif hasattr(entity, "container"): - container_key = api.library_container_locator(library.library_key, entity.container) - event_data = LibraryContainerData(container_key=container_key) + entity_opaque_key = api.library_container_locator(library.library_key, entity.container) + event_data = LibraryContainerData(container_key=entity_opaque_key) if change.old_version is None and change.new_version: # .. event_implemented_name: LIBRARY_CONTAINER_CREATED # .. event_type: org.openedx.content_authoring.content_library.container.created.v1 @@ -163,7 +167,7 @@ def send_change_events_for_modified_entities( # If entities were added/removed from this container, we need to notify things like the search index # that the list of parent containers for each entity has changed. check_container_content_changes.delay( - container_key_str=str(container_key), + container_key_str=str(entity_opaque_key), old_version_id=change.old_version_id, new_version_id=change.new_version_id, ) @@ -171,6 +175,57 @@ def send_change_events_for_modified_entities( log.error("Unknown publishable entity type: %s", entity) continue + if change.restored: + # This block/container was previously soft-deleted and is now un-deleted. It may have tags or collections. + # It would be best to expand the LIBRARY_BLOCK_CREATED event to include the "restored" flag, but in + # the interests of minimizing breaking event changes for now we'll just emit a + # CONTENT_OBJECT_ASSOCIATIONS_CHANGED event to ensure relevant search index records get updated. + association_changes: list[str] = [] + if content_api.get_entity_collections(learning_package_id, entity_ref=entity.entity_ref).exists(): + association_changes.append("collections") # This entity is part of at least one collection + if tagging_api.get_object_tags(str(entity_opaque_key)).exists(): + association_changes.append("tags") # This entity has tags + if association_changes: + # .. event_implemented_name: CONTENT_OBJECT_ASSOCIATIONS_CHANGED + # .. event_type: org.openedx.content_authoring.content.object.associations.changed.v1 + CONTENT_OBJECT_ASSOCIATIONS_CHANGED.send_event( + content_object=ContentObjectChangedData( + object_id=str(entity_opaque_key), + changes=association_changes, + ), + ) + # Notifying parent containers that a child has been restored is not necessary here - they'll already be + # included in 'change_list' [as side effects]. + + # When entities are deleted or un-deleted (as drafts), update any associated collections, so their "# of draft + # entities in collection" count is correct. We only care about deleted or un-deleted, not newly created drafts, + # because it's currently impossible for a newly-created draft to be part of a collection. + deleted_or_undeleted_entity_ids = [ + r.entity_id for r in changes if r.new_version is None or (r.old_version is None and r.restored) + ] + if deleted_or_undeleted_entity_ids: + emit_collections_updated(library, entity_ids=deleted_or_undeleted_entity_ids) + + +def emit_collections_updated(library: ContentLibrary, entity_ids: list[PublishableEntity.ID]) -> None: + """ + Helper function to notify affected collections after an entity is deleted/un-deleted/published/un-published. + + Used by `send_change_events_for_modified_entities()` and `send_events_after_publish()` + """ + # Check if any collections are affected: + affected_collections = ( + content_api.get_collections(library.learning_package_id, enabled=True).filter(entities__id__in=entity_ids) + ) + for collection in affected_collections: + collection_key = api.library_collection_locator( + library_key=library.library_key, + collection_key=collection.collection_code, + ) + # .. event_implemented_name: LIBRARY_COLLECTION_UPDATED + # .. event_type: org.openedx.content_authoring.content_library.collection.updated.v1 + LIBRARY_COLLECTION_UPDATED.send_event(library_collection=LibraryCollectionData(collection_key=collection_key)) + @shared_task(base=LoggedTask) @set_code_owner_attribute @@ -252,6 +307,8 @@ def send_collections_changed_events( """ Sends a CONTENT_OBJECT_ASSOCIATIONS_CHANGED event for each modified library entity in the given list, because their associated collections have changed. + This is dispatched in response to a COLLECTION_CHANGED event, usually + because entities have been added to or removed from a collection. ⏳ This task is designed to be run asynchronously so it can handle many entities, but you can also call it synchronously if you are only @@ -263,6 +320,8 @@ def send_collections_changed_events( entities = ( content_api.get_publishable_entities(learning_package_id) .filter(id__in=publishable_entity_ids) + # Ignore deleted items (both draft & published are deleted) that are still associated with the collection: + .exclude(draft__version=None, published__version=None) .select_related("component", "container") ) @@ -302,6 +361,7 @@ def send_events_after_publish(publish_log_id: int, library_key_str: str) -> None """ publish_log = PublishLog.objects.get(id=publish_log_id) library_key = LibraryLocatorV2.from_string(library_key_str) + library = ContentLibrary.objects.get_by_key(library_key) affected_entities = publish_log.records.select_related( "entity", "entity__container", "entity__container__container_type", "entity__component", ).all() @@ -333,6 +393,14 @@ def send_events_after_publish(publish_log_id: int, library_key_str: str) -> None "was modified during publish operation but is of unknown type." ) + # When entities get newly published or their published version is deleted, update the "# of published entities in + # collection" count of any associated collections. + newly_published_or_unpublished_entity_ids = [ + r.entity_id for r in affected_entities if r.new_version is None or r.old_version is None + ] + if not newly_published_or_unpublished_entity_ids: + emit_collections_updated(library, entity_ids=newly_published_or_unpublished_entity_ids) + def _filter_child(store, usage_key, capa_type): """ @@ -726,7 +794,11 @@ def dispatch_and_wait(task_fn: Task, wait_for_full_completion: bool = False, **k result: AsyncResult = task_fn.delay(**kwargs) # Try waiting a bit for the task to finish before we complete the request: try: - result.get(timeout=10) + # We use `disable_sync_subtasks=False` because some of our tasks emit + # events whose handlers then spawn additional tasks which are sometimes + # synchronous. Ideal usage of celery would be to "chain" the tasks + # instead of spawning subtasks, but that would require a major refactor. + result.get(timeout=10, disable_sync_subtasks=False) except CeleryTimeout: pass # This is fine! The search index is still being updated, and/or other diff --git a/openedx/core/djangoapps/content_libraries/tests/test_api.py b/openedx/core/djangoapps/content_libraries/tests/test_api.py index 99f2568d6114..1060ec42713e 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_api.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_api.py @@ -418,24 +418,24 @@ def test_delete_library_container(self) -> None: self.assertDictContainsEntries( event_receiver.call_args_list[0].kwargs, { - "signal": LIBRARY_COLLECTION_UPDATED, + "signal": LIBRARY_CONTAINER_UPDATED, "sender": None, - "library_collection": LibraryCollectionData( - collection_key=api.library_collection_locator( - self.lib1.library_key, - collection_key=self.col1.collection_code, - ), + "library_container": LibraryContainerData( + container_key=self.subsection1.container_key, + background=False, ), }, ) self.assertDictContainsEntries( event_receiver.call_args_list[1].kwargs, { - "signal": LIBRARY_CONTAINER_UPDATED, + "signal": LIBRARY_COLLECTION_UPDATED, "sender": None, - "library_container": LibraryContainerData( - container_key=self.subsection1.container_key, - background=False, + "library_collection": LibraryCollectionData( + collection_key=api.library_collection_locator( + self.lib1.library_key, + collection_key=self.col1.collection_code, + ), ), }, ) diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index befed51f0a09..429b143e97d9 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -840,7 +840,7 @@ openedx-calc==5.0.0 # via # -r requirements/edx/kernel.in # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/fix-collection-events-2 # via # -c requirements/constraints.txt # -r requirements/edx/kernel.in diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 506302e8dbf5..bfc1c6038b90 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -1383,7 +1383,7 @@ openedx-calc==5.0.0 # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/fix-collection-events-2 # via # -c requirements/constraints.txt # -r requirements/edx/doc.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 54a07685b1f1..c1e8ebab98a4 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -1018,7 +1018,7 @@ openedx-calc==5.0.0 # via # -r requirements/edx/base.txt # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/fix-collection-events-2 # via # -c requirements/constraints.txt # -r requirements/edx/base.txt diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 5fde667fbb7b..98c4092f4d19 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -1058,7 +1058,7 @@ openedx-calc==5.0.0 # via # -r requirements/edx/base.txt # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/fix-collection-events-2 # via # -c requirements/constraints.txt # -r requirements/edx/base.txt