Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions Dockerfile
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that the Dockerfile is getting autogenerated with scripts?
https://github.com/apache/airflow/blob/main/Dockerfile#L1634

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so should I remove that ?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so should I remove that ?

I don't know, here we need more experienced person to give us the information

Original file line number Diff line number Diff line change
Expand Up @@ -1640,6 +1640,7 @@ set -euo pipefail

readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
readonly RETENTION_MINUTES="${AIRFLOW__LOG_RETENTION_MINUTES:-0}"
readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"

trap "exit" INT TERM
Expand All @@ -1649,17 +1650,15 @@ readonly EVERY=$((FREQUENCY*60))
echo "Cleaning logs every $EVERY seconds"

while true; do
echo "Trimming airflow logs to ${RETENTION} days."
total_retention_minutes=$(( (RETENTION * 1440) + RETENTION_MINUTES ))
echo "Trimming airflow logs older than ${total_retention_minutes} minutes."

find "${DIRECTORY}"/logs \
-type d -name 'lost+found' -prune -o \
-type f -mtime +"${RETENTION}" -name '*.log' -print0 | \
-type f -mmin +"${total_retention_minutes}" -name '*.log' -print0 | \
xargs -0 rm -f || true

find "${DIRECTORY}"/logs -type d -empty -delete || true

seconds=$(( $(date -u +%s) % EVERY))
(( seconds < 1 )) || sleep $((EVERY - seconds - 1))
sleep 1
sleep "${EVERY}"
done
EOF

Expand Down
4 changes: 4 additions & 0 deletions chart/templates/dag-processor/dag-processor-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,10 @@ spec:
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.dagProcessor.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.dagProcessor.logGroomerSidecar.retentionMinutes }}
- name: AIRFLOW__LOG_RETENTION_MINUTES
value: "{{ .Values.dagProcessor.logGroomerSidecar.retentionMinutes }}"
{{- end }}
{{- if .Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}"
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/scheduler/scheduler-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ spec:
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.scheduler.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.scheduler.logGroomerSidecar.retentionMinutes }}
- name: AIRFLOW__LOG_RETENTION_MINUTES
value: "{{ .Values.scheduler.logGroomerSidecar.retentionMinutes }}"
{{- end }}
{{- if .Values.scheduler.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.scheduler.logGroomerSidecar.frequencyMinutes }}"
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/triggerer/triggerer-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ spec:
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.triggerer.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.triggerer.logGroomerSidecar.retentionMinutes }}
- name: AIRFLOW__LOG_RETENTION_MINUTES
value: "{{ .Values.triggerer.logGroomerSidecar.retentionMinutes }}"
{{- end }}
{{- if .Values.triggerer.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.triggerer.logGroomerSidecar.frequencyMinutes }}"
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/workers/worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,10 @@ spec:
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.workers.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.workers.logGroomerSidecar.retentionMinutes }}
- name: AIRFLOW__LOG_RETENTION_MINUTES
value: "{{ .Values.workers.logGroomerSidecar.retentionMinutes }}"
{{- end }}
{{- if .Values.workers.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.workers.logGroomerSidecar.frequencyMinutes }}"
Expand Down
5 changes: 5 additions & 0 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -13727,6 +13727,11 @@
"type": "integer",
"default": 15
},
"retentionMinutes": {
"description": "Total retention time is retentionDays + retentionMinutes.",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was there a mishap with this commit? ^^ you replaced the description instead of adding :)

"type": "integer",
"default": 0
},
"frequencyMinutes": {
"description": "Number of minutes between attempts to groom the Airflow logs in log groomer sidecar.",
"type": "integer",
Expand Down
23 changes: 23 additions & 0 deletions chart/values.yaml
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,11 @@ workers:
# Number of days to retain logs
retentionDays: 15

# Number of minutes to retain logs.
# This can be used for finer granularity than days.
# Total retention is retentionDays + retentionMinutes.
retentionMinutes: 0
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you considered instead of adding another parameter (plus all the complexity with this) to support fractions, e.g. retentionDays: 15.5 to make it every 15 days plus 12h? Or 0.0139 to have it every 20 minutes.

Copy link
Copy Markdown
Contributor

@n-badtke-cg n-badtke-cg Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have thought on that - and scraped the idea immediately :) Usability is too bad imo

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @n-badtke-cg, I think at the user level is harder to define the retention that way.

Copy link
Copy Markdown
Contributor

@n-badtke-cg n-badtke-cg Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also: we need to calculate minutes anyway because the -mtime only takes integers, so we cannot use fractional days with -mtime, we need to use -mmin anyway. Then, we need to catch weird calculations, because -mmin also takes integers only.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also an example: 10 minutes.

fractional days = 10min / (24hours * 60min) = 0,00694444444444444444444444444444...


# Frequency to attempt to groom logs (in minutes)
frequencyMinutes: 15

Expand Down Expand Up @@ -1361,6 +1366,12 @@ scheduler:
args: ["bash", "/clean-logs"]
# Number of days to retain logs
retentionDays: 15

# Number of minutes to retain logs.
# This can be used for finer granularity than days.
# Total retention is retentionDays + retentionMinutes.
retentionMinutes: 0

# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
resources: {}
Expand Down Expand Up @@ -2201,6 +2212,12 @@ triggerer:
args: ["bash", "/clean-logs"]
# Number of days to retain logs
retentionDays: 15

# Number of minutes to retain logs.
# This can be used for finer granularity than days.
# Total retention is retentionDays + retentionMinutes.
retentionMinutes: 0

# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
resources: {}
Expand Down Expand Up @@ -2427,6 +2444,12 @@ dagProcessor:
args: ["bash", "/clean-logs"]
# Number of days to retain logs
retentionDays: 15

# Number of minutes to retain logs.
# This can be used for finer granularity than days.
# Total retention is retentionDays + retentionMinutes.
retentionMinutes: 0

# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
resources: {}
Expand Down
9 changes: 6 additions & 3 deletions scripts/docker/clean-logs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
set -euo pipefail

readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
readonly RETENTION_DAYS="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
readonly RETENTION_MINUTES="${AIRFLOW__LOG_RETENTION_MINUTES:-0}"
readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"

trap "exit" INT TERM
Expand All @@ -30,10 +31,12 @@ readonly EVERY=$((FREQUENCY*60))
echo "Cleaning logs every $EVERY seconds"

while true; do
echo "Trimming airflow logs to ${RETENTION} days."
total_retention_minutes=$(( (RETENTION_DAYS * 1440) + RETENTION_MINUTES ))
echo "Trimming airflow logs older than ${total_retention_minutes} minutes."

find "${DIRECTORY}"/logs \
-type d -name 'lost+found' -prune -o \
-type f -mtime +"${RETENTION}" -name '*.log' -print0 | \
-type f -mmin +"${total_retention_minutes}" -name '*.log' -print0 | \
xargs -0 rm -f || true

find "${DIRECTORY}"/logs -type d -empty -delete || true
Expand Down
Loading