Skip to content

Commit

Permalink
data_import: Fix bot email address de-duplication.
Browse files Browse the repository at this point in the history
4815f6e tried to de-duplicate bot
email addresses, but instead caused duplicates to crash:

```
Traceback (most recent call last):
  File "./manage.py", line 157, in <module>
    execute_from_command_line(sys.argv)
  File "./manage.py", line 122, in execute_from_command_line
    utility.execute()
  File "/srv/zulip-venv-cache/56ac6adf406011a100282dd526d03537be84d23e/zulip-py3-venv/lib/python3.8/site-packages/django/core/management/__init__.py", line 413, in execute
    self.fetch_command(subcommand).run_from_argv(self.argv)
  File "/srv/zulip-venv-cache/56ac6adf406011a100282dd526d03537be84d23e/zulip-py3-venv/lib/python3.8/site-packages/django/core/management/base.py", line 354, in run_from_argv
    self.execute(*args, **cmd_options)
  File "/srv/zulip-venv-cache/56ac6adf406011a100282dd526d03537be84d23e/zulip-py3-venv/lib/python3.8/site-packages/django/core/management/base.py", line 398, in execute
    output = self.handle(*args, **options)
  File "/home/zulip/deployments/2022-03-16-22-25-42/zerver/management/commands/convert_slack_data.py", line 59, in handle
    do_convert_data(path, output_dir, token, threads=num_threads)
  File "/home/zulip/deployments/2022-03-16-22-25-42/zerver/data_import/slack.py", line 1320, in do_convert_data
    ) = slack_workspace_to_realm(
  File "/home/zulip/deployments/2022-03-16-22-25-42/zerver/data_import/slack.py", line 141, in slack_workspace_to_realm
    ) = users_to_zerver_userprofile(slack_data_dir, user_list, realm_id, int(NOW), domain_name)
  File "/home/zulip/deployments/2022-03-16-22-25-42/zerver/data_import/slack.py", line 248, in users_to_zerver_userprofile
    email = get_user_email(user, domain_name)
  File "/home/zulip/deployments/2022-03-16-22-25-42/zerver/data_import/slack.py", line 406, in get_user_email
    return SlackBotEmail.get_email(user["profile"], domain_name)
  File "/home/zulip/deployments/2022-03-16-22-25-42/zerver/data_import/slack.py", line 85, in get_email
    email_prefix += cls.duplicate_email_count[email]
TypeError: can only concatenate str (not "int") to str
```

Fix the stringification, make it case-insensitive, append with a dash
for readability, and add tests for all of the above.
  • Loading branch information
alexmv authored and timabbott committed Mar 31, 2022
1 parent 65e19c4 commit 2e50ead
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 4 deletions.
7 changes: 3 additions & 4 deletions zerver/data_import/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,13 @@ def get_email(cls: Type[SlackBotEmailT], user_profile: ZerverFieldsT, domain_nam
else:
raise AssertionError("Could not identify bot type")

email = slack_bot_name.replace("Bot", "").replace(" ", "") + f"-bot@{domain_name}"
email = slack_bot_name.replace("Bot", "").replace(" ", "").lower() + f"-bot@{domain_name}"

if email in cls.duplicate_email_count:
cls.duplicate_email_count[email] += 1
email_prefix, email_suffix = email.split("@")
email_prefix += cls.duplicate_email_count[email]
email_prefix += "-" + str(cls.duplicate_email_count[email])
email = "@".join([email_prefix, email_suffix])
# Increment the duplicate email count
cls.duplicate_email_count[email] += 1
else:
cls.duplicate_email_count[email] = 1

Expand Down
36 changes: 36 additions & 0 deletions zerver/tests/test_slack_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
AddedChannelsT,
AddedMPIMsT,
DMMembersT,
SlackBotEmail,
channel_message_to_zerver_message,
channels_to_zerver_stream,
convert_slack_workspace_messages,
Expand Down Expand Up @@ -1213,3 +1214,38 @@ def test_message_files(self) -> None:
self.assertEqual(uploads_list[0]["s3_path"], image_path)
self.assertEqual(uploads_list[0]["realm_id"], realm_id)
self.assertEqual(uploads_list[0]["user_profile_email"], "[email protected]")

def test_bot_duplicates(self) -> None:
self.assertEqual(
SlackBotEmail.get_email(
{"real_name_normalized": "Real Bot", "bot_id": "foo"}, "example.com"
),
"[email protected]",
)

# SlackBotEmail keeps state -- doing it again appends a "2", "3", etc
self.assertEqual(
SlackBotEmail.get_email(
{"real_name_normalized": "Real Bot", "bot_id": "bar"}, "example.com"
),
"[email protected]",
)
self.assertEqual(
SlackBotEmail.get_email(
{"real_name_normalized": "Real Bot", "bot_id": "baz"}, "example.com"
),
"[email protected]",
)

# But caches based on the bot_id
self.assertEqual(
SlackBotEmail.get_email(
{"real_name_normalized": "Real Bot", "bot_id": "foo"}, "example.com"
),
"[email protected]",
)

self.assertEqual(
SlackBotEmail.get_email({"first_name": "Other Name", "bot_id": "other"}, "example.com"),
"[email protected]",
)

0 comments on commit 2e50ead

Please sign in to comment.