Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions infrastructure/terraform/components/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ No requirements.

| Name | Source | Version |
|------|--------|---------|
| <a name="module_apigw_alarms"></a> [apigw\_alarms](#module\_apigw\_alarms) | ../../modules/alarms/alarms-apigw | n/a |
| <a name="module_authorizer_lambda"></a> [authorizer\_lambda](#module\_authorizer\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| <a name="module_ddb_alarms_letters"></a> [ddb\_alarms\_letters](#module\_ddb\_alarms\_letters) | ../../modules/alarms/alarms-ddb | n/a |
| <a name="module_ddb_alarms_mi"></a> [ddb\_alarms\_mi](#module\_ddb\_alarms\_mi) | ../../modules/alarms/alarms-ddb | n/a |
| <a name="module_ddb_alarms_suppliers"></a> [ddb\_alarms\_suppliers](#module\_ddb\_alarms\_suppliers) | ../../modules/alarms/alarms-ddb | n/a |
| <a name="module_domain_truststore"></a> [domain\_truststore](#module\_domain\_truststore) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-s3bucket.zip | n/a |
| <a name="module_eventpub"></a> [eventpub](#module\_eventpub) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-eventpub.zip | n/a |
| <a name="module_eventsub"></a> [eventsub](#module\_eventsub) | ../../modules/eventsub | n/a |
Expand All @@ -51,6 +55,7 @@ No requirements.
| <a name="module_get_letters"></a> [get\_letters](#module\_get\_letters) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| <a name="module_get_status"></a> [get\_status](#module\_get\_status) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| <a name="module_kms"></a> [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-kms.zip | n/a |
| <a name="module_lambda_alarms"></a> [lambda\_alarms](#module\_lambda\_alarms) | ../../modules/alarms/alarms-lambda | n/a |
| <a name="module_letter_status_update"></a> [letter\_status\_update](#module\_letter\_status\_update) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| <a name="module_letter_status_updates_queue"></a> [letter\_status\_updates\_queue](#module\_letter\_status\_updates\_queue) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a |
| <a name="module_letter_updates_transformer"></a> [letter\_updates\_transformer](#module\_letter\_updates\_transformer) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
Expand All @@ -60,6 +65,7 @@ No requirements.
| <a name="module_post_letters"></a> [post\_letters](#module\_post\_letters) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| <a name="module_post_mi"></a> [post\_mi](#module\_post\_mi) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| <a name="module_s3bucket_test_letters"></a> [s3bucket\_test\_letters](#module\_s3bucket\_test\_letters) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-s3bucket.zip | n/a |
| <a name="module_sqs_alarms"></a> [sqs\_alarms](#module\_sqs\_alarms) | ../../modules/alarms/alarms-sqs | n/a |
| <a name="module_sqs_letter_updates"></a> [sqs\_letter\_updates](#module\_sqs\_letter\_updates) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-sqs.zip | n/a |
| <a name="module_supplier_ssl"></a> [supplier\_ssl](#module\_supplier\_ssl) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-ssl.zip | n/a |
| <a name="module_upsert_letter"></a> [upsert\_letter](#module\_upsert\_letter) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
Expand Down
77 changes: 77 additions & 0 deletions infrastructure/terraform/components/api/alarms.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
locals {
lambda_alarm_targets = {
authorizer_lambda = module.authorizer_lambda.function_name
get_letter = module.get_letter.function_name
get_letters = module.get_letters.function_name
get_letter_data = module.get_letter_data.function_name
get_status = module.get_status.function_name
patch_letter = module.patch_letter.function_name
post_letters = module.post_letters.function_name
post_mi = module.post_mi.function_name
upsert_letter = module.upsert_letter.function_name
letter_status_update = module.letter_status_update.function_name
letter_updates_transformer = module.letter_updates_transformer.function_name
mi_updates_transformer = module.mi_updates_transformer.function_name
}

sqs_queue_names = {
letter_updates = {
name = module.sqs_letter_updates.sqs_queue_name
age_period_seconds = 900
}
letter_status_updates = {
name = module.letter_status_updates_queue.sqs_queue_name
age_period_seconds = 300
}
}
}

module "lambda_alarms" {
for_each = local.lambda_alarm_targets
source = "../../modules/alarms/alarms-lambda"

alarm_prefix = local.csi
function_name = each.value
log_group_name = "/aws/lambda/${each.value}"
tags = local.default_tags
}

module "ddb_alarms_letters" {
source = "../../modules/alarms/alarms-ddb"
alarm_prefix = local.csi
table_name = aws_dynamodb_table.letters.name
tags = local.default_tags
}

module "ddb_alarms_mi" {
source = "../../modules/alarms/alarms-ddb"
alarm_prefix = local.csi
table_name = aws_dynamodb_table.mi.name
tags = local.default_tags
}

module "ddb_alarms_suppliers" {
source = "../../modules/alarms/alarms-ddb"
alarm_prefix = local.csi
table_name = aws_dynamodb_table.suppliers.name
tags = local.default_tags
}

module "sqs_alarms" {
for_each = local.sqs_queue_names
source = "../../modules/alarms/alarms-sqs"

alarm_prefix = local.csi
queue_name = each.value.name
dlq_queue_name = replace(each.value.name, "-queue", "-dlq")
age_period_seconds = each.value.age_period_seconds
tags = local.default_tags
}

module "apigw_alarms" {
source = "../../modules/alarms/alarms-apigw"
alarm_prefix = local.csi
api_name = aws_api_gateway_rest_api.main.name
stage_name = aws_api_gateway_stage.main.stage_name
tags = local.default_tags
}
19 changes: 19 additions & 0 deletions infrastructure/terraform/modules/alarms/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!-- BEGIN_TF_DOCS -->
<!-- markdownlint-disable -->
<!-- vale off -->

## Requirements

No requirements.
## Inputs

No inputs.
## Modules

No modules.
## Outputs

No outputs.
<!-- vale on -->
<!-- markdownlint-enable -->
<!-- END_TF_DOCS -->
34 changes: 34 additions & 0 deletions infrastructure/terraform/modules/alarms/alarms-apigw/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<!-- BEGIN_TF_DOCS -->
<!-- markdownlint-disable -->
<!-- vale off -->

## Requirements

| Name | Version |
|------|---------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.9.0 |
## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_alarm_prefix"></a> [alarm\_prefix](#input\_alarm\_prefix) | n/a | `string` | n/a | yes |
| <a name="input_api_name"></a> [api\_name](#input\_api\_name) | n/a | `string` | n/a | yes |
| <a name="input_error_5xx_evaluation_periods"></a> [error\_5xx\_evaluation\_periods](#input\_error\_5xx\_evaluation\_periods) | n/a | `number` | `1` | no |
| <a name="input_error_5xx_period_seconds"></a> [error\_5xx\_period\_seconds](#input\_error\_5xx\_period\_seconds) | n/a | `number` | `60` | no |
| <a name="input_error_5xx_threshold"></a> [error\_5xx\_threshold](#input\_error\_5xx\_threshold) | n/a | `number` | `0` | no |
| <a name="input_latency_anomaly_sensitivity"></a> [latency\_anomaly\_sensitivity](#input\_latency\_anomaly\_sensitivity) | n/a | `number` | `2` | no |
| <a name="input_latency_datapoints_to_alarm"></a> [latency\_datapoints\_to\_alarm](#input\_latency\_datapoints\_to\_alarm) | n/a | `number` | `3` | no |
| <a name="input_latency_evaluation_periods"></a> [latency\_evaluation\_periods](#input\_latency\_evaluation\_periods) | n/a | `number` | `5` | no |
| <a name="input_latency_period_seconds"></a> [latency\_period\_seconds](#input\_latency\_period\_seconds) | n/a | `number` | `60` | no |
| <a name="input_latency_threshold_ms"></a> [latency\_threshold\_ms](#input\_latency\_threshold\_ms) | n/a | `number` | `29000` | no |
| <a name="input_stage_name"></a> [stage\_name](#input\_stage\_name) | n/a | `string` | n/a | yes |
| <a name="input_tags"></a> [tags](#input\_tags) | n/a | `map(string)` | `{}` | no |
## Modules

No modules.
## Outputs

No outputs.
<!-- vale on -->
<!-- markdownlint-enable -->
<!-- END_TF_DOCS -->
87 changes: 87 additions & 0 deletions infrastructure/terraform/modules/alarms/alarms-apigw/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
locals {
api_dimensions = {
ApiName = var.api_name
Stage = var.stage_name
}
}

resource "aws_cloudwatch_metric_alarm" "five_xx" {
alarm_name = "${var.alarm_prefix}-apigw-5xx"
alarm_description = "RELIABILITY: API Gateway 5xx responses"

namespace = "AWS/ApiGateway"
metric_name = "5XXError"
statistic = "Sum"
period = var.error_5xx_period_seconds

evaluation_periods = var.error_5xx_evaluation_periods
threshold = var.error_5xx_threshold
comparison_operator = "GreaterThanThreshold"
treat_missing_data = "notBreaching"

dimensions = local.api_dimensions

actions_enabled = false
alarm_actions = []
ok_actions = []
insufficient_data_actions = []
tags = var.tags
}

resource "aws_cloudwatch_metric_alarm" "latency_threshold" {
alarm_name = "${var.alarm_prefix}-apigw-latency-threshold"
alarm_description = "RELIABILITY: API Gateway latency above threshold"

namespace = "AWS/ApiGateway"
metric_name = "Latency"
statistic = "Average"
period = var.latency_period_seconds

evaluation_periods = var.latency_evaluation_periods
threshold = var.latency_threshold_ms
comparison_operator = "GreaterThanThreshold"
treat_missing_data = "notBreaching"

dimensions = local.api_dimensions

actions_enabled = false
alarm_actions = []
ok_actions = []
insufficient_data_actions = []
tags = var.tags
}

resource "aws_cloudwatch_metric_alarm" "latency_anomaly" {
alarm_name = "${var.alarm_prefix}-apigw-latency-anomaly"
alarm_description = "RELIABILITY: API Gateway latency anomaly"
comparison_operator = "GreaterThanUpperThreshold"
evaluation_periods = var.latency_evaluation_periods
datapoints_to_alarm = var.latency_datapoints_to_alarm
threshold_metric_id = "ad1"
treat_missing_data = "notBreaching"

actions_enabled = false
alarm_actions = []
ok_actions = []
insufficient_data_actions = []
tags = var.tags

metric_query {
id = "m1"
metric {
metric_name = "Latency"
namespace = "AWS/ApiGateway"
stat = "Average"
period = var.latency_period_seconds
dimensions = local.api_dimensions
}
return_data = true
}

metric_query {
id = "ad1"
expression = "ANOMALY_DETECTION_BAND(m1, ${var.latency_anomaly_sensitivity})"
label = "Latency (expected)"
return_data = true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
variable "alarm_prefix" {
type = string
}

variable "api_name" {
type = string
}

variable "stage_name" {
type = string
}

variable "tags" {
type = map(string)
default = {}
}

variable "error_5xx_threshold" {
type = number
default = 0
}

variable "error_5xx_period_seconds" {
type = number
default = 60
}

variable "error_5xx_evaluation_periods" {
type = number
default = 1
}

variable "latency_threshold_ms" {
type = number
default = 29000
}

variable "latency_period_seconds" {
type = number
default = 60
}

variable "latency_evaluation_periods" {
type = number
default = 5
}

variable "latency_datapoints_to_alarm" {
type = number
default = 3
}

variable "latency_anomaly_sensitivity" {
type = number
default = 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
}
required_version = ">= 1.9.0"
}
29 changes: 29 additions & 0 deletions infrastructure/terraform/modules/alarms/alarms-ddb/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<!-- BEGIN_TF_DOCS -->
<!-- markdownlint-disable -->
<!-- vale off -->

## Requirements

| Name | Version |
|------|---------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.9.0 |
## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_alarm_prefix"></a> [alarm\_prefix](#input\_alarm\_prefix) | n/a | `string` | n/a | yes |
| <a name="input_evaluation_periods"></a> [evaluation\_periods](#input\_evaluation\_periods) | n/a | `number` | `1` | no |
| <a name="input_period_seconds"></a> [period\_seconds](#input\_period\_seconds) | n/a | `number` | `60` | no |
| <a name="input_read_throttle_threshold"></a> [read\_throttle\_threshold](#input\_read\_throttle\_threshold) | n/a | `number` | `0` | no |
| <a name="input_table_name"></a> [table\_name](#input\_table\_name) | n/a | `string` | n/a | yes |
| <a name="input_tags"></a> [tags](#input\_tags) | n/a | `map(string)` | `{}` | no |
| <a name="input_write_throttle_threshold"></a> [write\_throttle\_threshold](#input\_write\_throttle\_threshold) | n/a | `number` | `0` | no |
## Modules

No modules.
## Outputs

No outputs.
<!-- vale on -->
<!-- markdownlint-enable -->
<!-- END_TF_DOCS -->
45 changes: 45 additions & 0 deletions infrastructure/terraform/modules/alarms/alarms-ddb/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
resource "aws_cloudwatch_metric_alarm" "read_throttle" {
alarm_name = "${var.alarm_prefix}-ddb-${var.table_name}-read-throttle"
alarm_description = "RELIABILITY: DynamoDB read throttling"

namespace = "AWS/DynamoDB"
metric_name = "ReadThrottleEvents"
statistic = "Sum"
period = var.period_seconds

evaluation_periods = var.evaluation_periods
threshold = var.read_throttle_threshold
comparison_operator = "GreaterThanThreshold"
treat_missing_data = "notBreaching"

dimensions = { TableName = var.table_name }

actions_enabled = false
alarm_actions = []
ok_actions = []
insufficient_data_actions = []
tags = var.tags
}

resource "aws_cloudwatch_metric_alarm" "write_throttle" {
alarm_name = "${var.alarm_prefix}-ddb-${var.table_name}-write-throttle"
alarm_description = "RELIABILITY: DynamoDB write throttling"

namespace = "AWS/DynamoDB"
metric_name = "WriteThrottleEvents"
statistic = "Sum"
period = var.period_seconds

evaluation_periods = var.evaluation_periods
threshold = var.write_throttle_threshold
comparison_operator = "GreaterThanThreshold"
treat_missing_data = "notBreaching"

dimensions = { TableName = var.table_name }

actions_enabled = false
alarm_actions = []
ok_actions = []
insufficient_data_actions = []
tags = var.tags
}
Loading
Loading