diff --git a/back/admin/integrations/models.py b/back/admin/integrations/models.py index a14489411..5f62a0308 100644 --- a/back/admin/integrations/models.py +++ b/back/admin/integrations/models.py @@ -220,6 +220,13 @@ def is_sync_users_integration(self): def can_revoke_access(self): return len(self.manifest.get("revoke", [])) + @property + def can_backfill_ids(self): + # True when the manifest's `exists` block declares `store_data`, + # meaning we can extract IDs from the lookup response and backfill + # them into existing users' extra_fields. + return bool(self.manifest.get("exists", {}).get("store_data")) + @property def update_url(self): return reverse("integrations:update", args=[self.id]) @@ -518,6 +525,7 @@ def user_exists(self, new_hire, save_result=True): self.new_hire = new_hire self.has_user_context = new_hire is not None + self.params = new_hire.extra_fields # Renew token if necessary if not self.renew_key(): @@ -530,6 +538,28 @@ def user_exists(self, new_hire, save_result=True): user_exists = self.tracker.steps.last().found_expected + # If the user was found and the manifest declares store_data on its + # exists block, capture those values into extra_fields. Lets a single + # lookup populate IDs (e.g. ATLASSIAN_USER_ID, bitwarden_id) for users + # that pre-existed in the upstream system. + store_data = self.manifest["exists"].get("store_data", {}) + if user_exists and store_data: + try: + json_response = response.json() + except (ValueError, AttributeError): + json_response = {} + for new_hire_prop, notation in store_data.items(): + try: + value = get_value_from_notation( + self._replace_vars(notation), json_response + ) + except KeyError: + continue + if value is None: + continue + new_hire.extra_fields[new_hire_prop] = value + new_hire.save() + if save_result: IntegrationUser.objects.update_or_create( integration=self, user=new_hire, defaults={"revoked": not user_exists} diff --git a/back/admin/integrations/serializers.py b/back/admin/integrations/serializers.py index 12130b926..89e0d0d84 100644 --- a/back/admin/integrations/serializers.py +++ b/back/admin/integrations/serializers.py @@ -51,6 +51,7 @@ class ManifestPollingSerializer(ValidateMixin, serializers.Serializer): class ManifestExistSerializer(ValidateMixin, serializers.Serializer): url = serializers.CharField() expected = serializers.CharField() + store_data = serializers.DictField(child=serializers.CharField(), default=dict) status_code = serializers.ListField( child=serializers.IntegerField(), required=False ) diff --git a/back/admin/integrations/tasks.py b/back/admin/integrations/tasks.py index bcb9776ba..4de43fa32 100644 --- a/back/admin/integrations/tasks.py +++ b/back/admin/integrations/tasks.py @@ -1,8 +1,12 @@ +import logging + from django.contrib.auth import get_user_model from admin.integrations.models import Integration from admin.integrations.sync_userinfo import SyncUsers +logger = logging.getLogger(__name__) + def retry_integration(new_hire_id, integration_id, params): integration = Integration.objects.get(id=integration_id) @@ -15,3 +19,50 @@ def sync_user_info(integration_id): # users or we will add new users. This is done in the background. integration = Integration.objects.get(id=integration_id) SyncUsers(integration).run() + + +def backfill_integration_ids(integration_id): + # Run the integration's `exists` lookup against every user. Any + # store_data fields declared on the exists block get written to the + # user's extra_fields. Used to populate IDs for users who were + # provisioned in the external system before this integration existed. + integration = Integration.objects.get(id=integration_id) + store_keys = list( + integration.manifest.get("exists", {}).get("store_data", {}).keys() + ) + + users = get_user_model().objects.exclude(email="").order_by("id") + matched = skipped = not_found = errored = 0 + + for user in users: + # skip users who already have all backfill keys set + if store_keys and all(k in user.extra_fields for k in store_keys): + skipped += 1 + continue + try: + result = integration.user_exists(user, save_result=False) + except Exception as e: + logger.warning( + "Backfill error for integration %s, user %s: %s", + integration_id, user.email, e, + ) + errored += 1 + continue + if result is True: + matched += 1 + elif result is False: + not_found += 1 + else: + errored += 1 + + logger.info( + "Backfill complete for integration %s: " + "%s matched, %s skipped, %s not found, %s errored", + integration_id, matched, skipped, not_found, errored, + ) + return { + "matched": matched, + "skipped": skipped, + "not_found": not_found, + "errored": errored, + } diff --git a/back/admin/integrations/urls.py b/back/admin/integrations/urls.py index 8f182daee..b2117cb0c 100644 --- a/back/admin/integrations/urls.py +++ b/back/admin/integrations/urls.py @@ -33,6 +33,11 @@ views.IntegrationDeleteExtraArgsView.as_view(), name="delete-creds", ), + path( + "backfill_ids//", + views.IntegrationBackfillIDsView.as_view(), + name="backfill-ids", + ), path( "tracker/", views.IntegrationTrackerListView.as_view(), diff --git a/back/admin/integrations/utils.py b/back/admin/integrations/utils.py index c2e4daf68..89a661859 100644 --- a/back/admin/integrations/utils.py +++ b/back/admin/integrations/utils.py @@ -1,27 +1,72 @@ +def _tokenize_notation(notation): + # split on '.' but keep [...] groups intact, so values inside a filter + # expression (which may themselves contain '.', e.g. emails) aren't split + tokens = [] + buf = "" + depth = 0 + for ch in notation: + if ch == "[": + depth += 1 + buf += ch + elif ch == "]": + depth -= 1 + buf += ch + elif ch == "." and depth == 0: + if buf: + tokens.append(buf) + buf = "" + else: + buf += ch + if buf: + tokens.append(buf) + return tokens + + def get_value_from_notation(notation, value): # if we don't need to go into props, then just return the value if notation == "": return value - notations = notation.split(".") - for notation in notations: + for token in _tokenize_notation(notation): + # filter form: optional_key[field=expected] - pick first list entry + # whose `field` equals `expected`. Useful when the upstream API returns + # an unfiltered list (e.g. Bitwarden /public/members). + if "[" in token and token.endswith("]"): + list_key, _, filter_expr = token.partition("[") + filter_expr = filter_expr[:-1] + + if list_key: + try: + value = value[list_key] + except (KeyError, TypeError): + raise KeyError + + if "=" not in filter_expr or not isinstance(value, list): + raise KeyError + + field, _, expected = filter_expr.partition("=") + for item in value: + if isinstance(item, dict) and str(item.get(field, "")) == expected: + value = item + break + else: + raise KeyError + continue + try: - value = value[notation] + value = value[token] except TypeError: - # check if array if not isinstance(value, list): raise KeyError try: - index = int(notation) + index = int(token) except (TypeError, ValueError): - # keep errors consistent, we are only expecting a KeyError raise KeyError try: value = value[index] except (TypeError, ValueError, IndexError): - # keep errors consistent, we are only expecting a KeyError raise KeyError return value diff --git a/back/admin/integrations/views.py b/back/admin/integrations/views.py index 832bfc033..bfe625fac 100644 --- a/back/admin/integrations/views.py +++ b/back/admin/integrations/views.py @@ -15,6 +15,7 @@ from django.views.generic.detail import DetailView from django.views.generic.edit import CreateView, DeleteView, UpdateView from django.views.generic.list import ListView +from django_q.tasks import async_task from users.mixins import AdminOrManagerPermMixin, AdminPermMixin @@ -244,3 +245,25 @@ def get_context_data(self, **kwargs): } context["subtitle"] = _("integrations") return context + + +class IntegrationBackfillIDsView(AdminPermMixin, View): + def post(self, request, pk): + integration = get_object_or_404(Integration, pk=pk) + if not integration.can_backfill_ids: + messages.error( + request, + _("This integration has no store_data declared on its exists block."), + ) + return redirect("settings:integrations") + async_task( + "admin.integrations.tasks.backfill_integration_ids", + integration.id, + task_name=f"Backfill IDs: {integration.name}", + ) + messages.success( + request, + _("Backfill started for %(name)s. Users' extra fields will populate " + "as the lookup runs in the background.") % {"name": integration.name}, + ) + return redirect("settings:integrations") diff --git a/back/admin/settings/templates/settings_integrations.html b/back/admin/settings/templates/settings_integrations.html index 17174d36c..bf5bf8370 100644 --- a/back/admin/settings/templates/settings_integrations.html +++ b/back/admin/settings/templates/settings_integrations.html @@ -65,6 +65,15 @@ {% translate "Update credentials" %} {% endif %} + {% if integration.can_backfill_ids %} +
+ {% csrf_token %} + +
+ {% endif %} {% translate "Update manifest" %}