Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from django.db import migrations

from firetower.incidents.tasks import SCHEDULES


def create_schedule(apps, schema_editor):
Schedule = apps.get_model("django_q", "Schedule")
schedule_name = "archive_stale_channels"
Schedule.objects.get_or_create(
name=schedule_name, defaults=SCHEDULES[schedule_name]
)


def delete_schedule(apps, schema_editor):
Schedule = apps.get_model("django_q", "Schedule")
schedule_name = "archive_stale_channels"
Schedule.objects.filter(name=schedule_name).delete()


class Migration(migrations.Migration):
dependencies = [
("incidents", "0018_add_action_item_model"),
("django_q", "0018_task_success_index"),
]

operations = [
migrations.RunPython(create_schedule, delete_schedule),
]
117 changes: 114 additions & 3 deletions src/firetower/incidents/tasks.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
import functools
import logging
import re
import time
from typing import Protocol

from datadog import statsd
from django_q.tasks import Schedule

from firetower.incidents.models import Incident
from firetower.incidents.models import ExternalLink, ExternalLinkType, Incident
from firetower.integrations.services.slack import SlackService

SCHEDULES = {
"schedule_demo": {
"func": "firetower.incidents.tasks.schedule_demo",
"schedule_type": Schedule.MINUTES, # Minutes
"schedule_type": Schedule.MINUTES,
"minutes": 5,
"repeats": -1, # repeat indefinitely
"repeats": -1,
},
"archive_stale_channels": {
"func": "firetower.incidents.tasks.archive_stale_channels",

Check failure on line 21 in src/firetower/incidents/tasks.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[5U2-CS4] get_channel_history(limit=5) silently misses human messages beyond the first 5, causing incorrect archival (additional location)

Only the 5 most recent messages are checked; if a channel has human messages ranked 6th or later (all within the retention window), none of them are seen and the channel is incorrectly archived. Use `limit=None` to paginate all remaining messages, or explicitly accept this constraint and document it.
"schedule_type": Schedule.DAILY,
Comment thread
github-actions[bot] marked this conversation as resolved.
"repeats": -1,
Comment thread
github-actions[bot] marked this conversation as resolved.
Comment thread
github-actions[bot] marked this conversation as resolved.
},

Check warning on line 24 in src/firetower/incidents/tasks.py

View check run for this annotation

@sentry/warden / warden: code-review

[CVQ-NTN] limit=5 history window can cause active channels to be incorrectly archived (additional location)

`archive_stale_channels` decides staleness from only the 5 most recent messages and then filters out own-bot posts. A channel where the latest 5 messages happen to be Firetower's own bot posts (e.g. status/automation updates) but human discussion exists older than position 5 will produce an empty `non_own_messages` list and be archived. The PR description states the intent is to archive only when retention policy has removed all messages (0 remaining); consider using `limit=1` and treating any returned message as proof of activity, or paginating fully before filtering.
}

Expand Down Expand Up @@ -60,6 +67,110 @@
return wrapper


ARCHIVE_NOTICE = (
"This channel is being archived by Firetower because all message history "
"has been removed by the workspace retention policy and there doesn't "
"appear to be any active discussions."
)

ARCHIVE_CHANNEL_DELAY_SECONDS = 2


@datadog_log
def archive_stale_channels() -> None:
slack = SlackService()
if not slack.client:
logger.error(
"Slack client not initialized -- disabling archive_stale_channels schedule"
)
Schedule.objects.filter(name="archive_stale_channels").update(repeats=0)
return

own_bot_id = slack.bot_id

links = ExternalLink.objects.filter(type=ExternalLinkType.SLACK).select_related(
Comment thread
rgibert marked this conversation as resolved.
"incident"
)

scanned = 0
archived = 0
skipped = 0
errored = 0

for i, link in enumerate(links):
if i > 0:
time.sleep(ARCHIVE_CHANNEL_DELAY_SECONDS)

scanned += 1
channel_id = slack.parse_channel_id_from_url(link.url)
if not channel_id:
skipped += 1
continue

try:
info = slack.get_channel_info(channel_id)
if info is None:
logger.warning(
f"Could not fetch info for channel {channel_id} "
f"(incident {link.incident.incident_number}), skipping"
)
skipped += 1
continue

if info.get("is_archived"):
skipped += 1
continue

Check failure on line 122 in src/firetower/incidents/tasks.py

View check run for this annotation

@sentry/warden / warden: find-bugs

get_channel_history(limit=5) silently misses human messages beyond the first 5, causing incorrect archival

Only the 5 most recent messages are checked; if a channel has human messages ranked 6th or later (all within the retention window), none of them are seen and the channel is incorrectly archived. Use `limit=None` to paginate all remaining messages, or explicitly accept this constraint and document it.

messages = slack.get_channel_history(channel_id, limit=5)
non_own_messages = [
msg
for msg in messages
if msg.get("bot_id") != own_bot_id or not own_bot_id
]

Check warning on line 129 in src/firetower/incidents/tasks.py

View check run for this annotation

@sentry/warden / warden: code-review

[CVQ-NTN] limit=5 history window can cause active channels to be incorrectly archived (additional location)

`archive_stale_channels` decides staleness from only the 5 most recent messages and then filters out own-bot posts. A channel where the latest 5 messages happen to be Firetower's own bot posts (e.g. status/automation updates) but human discussion exists older than position 5 will produce an empty `non_own_messages` list and be archived. The PR description states the intent is to archive only when retention policy has removed all messages (0 remaining); consider using `limit=1` and treating any returned message as proof of activity, or paginating fully before filtering.
if non_own_messages:
skipped += 1
continue

notice_ts = slack.post_message(channel_id, ARCHIVE_NOTICE)
Comment thread
github-actions[bot] marked this conversation as resolved.
if not notice_ts:
logger.error(
f"Failed to post archive notice to channel {channel_id} "
f"(incident {link.incident.incident_number}), skipping archive"
)
errored += 1
continue

try:
if not slack.archive_channel(channel_id):
raise RuntimeError(
f"archive_channel returned False for {channel_id}"
)
archived += 1
logger.info(
f"Archived stale channel {channel_id} "
f"(incident {link.incident.incident_number})"
)
except Exception:
errored += 1
logger.exception(
f"Failed to archive channel {channel_id} "
f"(incident {link.incident.incident_number}), "
f"deleting notice"
)
slack.delete_message(channel_id, notice_ts)
except Exception:
errored += 1
logger.exception(
f"Error processing channel {channel_id} "
f"(incident {link.incident.incident_number})"
)

logger.info(
f"archive_stale_channels complete: "
f"scanned={scanned} archived={archived} skipped={skipped} errored={errored}"
)


@datadog_log
def schedule_demo() -> None:
incident = Incident.objects.order_by("-created_at").first()
Expand Down
Loading
Loading