Skip to content

Commit

Permalink
POC delete stale non-e2e devices for users (matrix-org#14038)
Browse files Browse the repository at this point in the history
This should help reduce the number of devices e.g. simple bots the repeatedly login rack up.

We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message.

Co-authored-by: Patrick Cloke <[email protected]>
Co-authored-by: Sean Quah <[email protected]>
  • Loading branch information
3 people authored Nov 29, 2022
1 parent 72f3e38 commit c7e29ca
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 4 deletions.
1 change: 1 addition & 0 deletions changelog.d/14038.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Prune user's old devices on login if they have too many.
13 changes: 12 additions & 1 deletion synapse/handlers/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,9 @@ async def check_device_registered(

self._check_device_name_length(initial_device_display_name)

# Prune the user's device list if they already have a lot of devices.
await self._prune_too_many_devices(user_id)

if device_id is not None:
new_device = await self.store.store_device(
user_id=user_id,
Expand Down Expand Up @@ -452,6 +455,14 @@ async def check_device_registered(

raise errors.StoreError(500, "Couldn't generate a device ID.")

async def _prune_too_many_devices(self, user_id: str) -> None:
"""Delete any excess old devices this user may have."""
device_ids = await self.store.check_too_many_devices_for_user(user_id)
if not device_ids:
return

await self.delete_devices(user_id, device_ids)

async def _delete_stale_devices(self) -> None:
"""Background task that deletes devices which haven't been accessed for more than
a configured time period.
Expand Down Expand Up @@ -481,7 +492,7 @@ async def delete_all_devices_for_user(
device_ids = [d for d in device_ids if d != except_device_id]
await self.delete_devices(user_id, device_ids)

async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
"""Delete several devices
Args:
Expand Down
67 changes: 66 additions & 1 deletion synapse/storage/databases/main/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1533,6 +1533,70 @@ def _txn(txn: LoggingTransaction) -> int:

return rows

async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]:
"""Check if the user has a lot of devices, and if so return the set of
devices we can prune.
This does *not* return hidden devices or devices with E2E keys.
"""

num_devices = await self.db_pool.simple_select_one_onecol(
table="devices",
keyvalues={"user_id": user_id, "hidden": False},
retcol="COALESCE(COUNT(*), 0)",
desc="count_devices",
)

# We let users have up to ten devices without pruning.
if num_devices <= 10:
return ()

# We prune everything older than N days.
max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000

if num_devices > 50:
# If the user has more than 50 devices, then we chose a last seen
# that ensures we keep at most 50 devices.
sql = """
SELECT last_seen FROM devices
WHERE
user_id = ?
AND NOT hidden
AND last_seen IS NOT NULL
AND key_json IS NULL
ORDER BY last_seen DESC
LIMIT 1
OFFSET 50
"""

rows = await self.db_pool.execute(
"check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
)
if rows:
max_last_seen = max(rows[0][0], max_last_seen)

# Now fetch the devices to delete.
sql = """
SELECT DISTINCT device_id FROM devices
LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
WHERE
user_id = ?
AND NOT hidden
AND last_seen < ?
AND key_json IS NULL
"""

def check_too_many_devices_for_user_txn(
txn: LoggingTransaction,
) -> Collection[str]:
txn.execute(sql, (user_id, max_last_seen))
return {device_id for device_id, in txn}

return await self.db_pool.runInteraction(
"check_too_many_devices_for_user",
check_too_many_devices_for_user_txn,
)


class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
# Because we have write access, this will be a StreamIdGenerator
Expand Down Expand Up @@ -1591,6 +1655,7 @@ async def store_device(
values={},
insertion_values={
"display_name": initial_device_display_name,
"last_seen": self._clock.time_msec(),
"hidden": False,
},
desc="store_device",
Expand Down Expand Up @@ -1636,7 +1701,7 @@ async def store_device(
)
raise StoreError(500, "Problem storing device.")

async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
"""Deletes several devices.
Args:
Expand Down
2 changes: 1 addition & 1 deletion tests/handlers/test_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_get_devices_by_user(self) -> None:
"device_id": "xyz",
"display_name": "display 0",
"last_seen_ip": None,
"last_seen_ts": None,
"last_seen_ts": 1000000,
},
device_map["xyz"],
)
Expand Down
4 changes: 3 additions & 1 deletion tests/storage/test_client_ips.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool):
)
)

last_seen = self.clock.time_msec()

if after_persisting:
# Trigger the storage loop
self.reactor.advance(10)
Expand All @@ -189,7 +191,7 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool):
"device_id": device_id,
"ip": None,
"user_agent": None,
"last_seen": None,
"last_seen": last_seen,
},
],
)
Expand Down

0 comments on commit c7e29ca

Please sign in to comment.