Skip to content

Commit 4ed6a93

Browse files
authored
[Monitor OpenTelemetry Exporter] Add Further Customer Statsbeat Codes (#35004)
### Packages impacted by this PR @azure/monitor-opentelemetry-exporter ### Issues associated with this PR ### Describe the problem that is addressed by this PR We should cover read-only access and client timeout issues in customer statsbeat. ### Are there test cases added in this PR? _(If not, why?)_ Yes ### Checklists - [x] Added impacted package name to the issue description - [ ] Does this PR needs any fixes in the SDK Generator?** _(If so, create an Issue in the [Autorest/typescript](https://github.com/Azure/autorest.typescript) repository and link it here)_ - [x] Added a changelog (if necessary)
1 parent dbba2a3 commit 4ed6a93

6 files changed

Lines changed: 132 additions & 16 deletions

File tree

sdk/monitor/monitor-opentelemetry-exporter/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## 1.0.0-beta.33 ()
44

5+
### Features Added
6+
7+
- Track CLIENT_READONLY and CLIENT_TIMEOUT customer statsbeat.
8+
59
### Bugs Fixed
610

711
- Fix auto-detection of RP environment for azure functions.

sdk/monitor/monitor-opentelemetry-exporter/src/export/statsbeat/customerStatsbeat.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,4 +537,32 @@ export class CustomerStatsbeatMetrics extends StatsbeatMetrics {
537537
}
538538
return TelemetryType.UNKNOWN;
539539
}
540+
541+
/**
542+
* Checks if the given error is a timeout-related error
543+
* @param error - The error to check
544+
* @returns true if the error is timeout-related, false otherwise
545+
*/
546+
public isTimeoutError(error: { code?: string; message?: string }): boolean {
547+
// Check for various timeout error codes that indicate client timeouts
548+
const timeoutErrorCodes = [
549+
"ETIMEDOUT", // Connection timed out
550+
"ESOCKETTIMEDOUT", // Socket timeout
551+
"ECONNRESET", // Connection reset (often due to timeout)
552+
"ENOTFOUND", // DNS lookup failed/timeout
553+
];
554+
555+
if (error && error.code && timeoutErrorCodes.includes(error.code)) {
556+
return true;
557+
}
558+
559+
// Also check if the error message contains timeout-related keywords
560+
if (error && error.message) {
561+
const timeoutKeywords = ["timeout", "timed out", "connection reset"];
562+
const errorMessage = error.message.toLowerCase();
563+
return timeoutKeywords.some((keyword) => errorMessage.includes(keyword));
564+
}
565+
566+
return false;
567+
}
540568
}

sdk/monitor/monitor-opentelemetry-exporter/src/platform/nodejs/baseSender.ts

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { LongIntervalStatsbeatMetrics } from "../../export/statsbeat/longInterva
1212
import type { RestError } from "@azure/core-rest-pipeline";
1313
import {
1414
DropCode,
15+
RetryCode,
1516
MAX_STATSBEAT_FAILURES,
1617
isStatsbeatShutdownStatus,
1718
} from "../../export/statsbeat/types.js";
@@ -259,23 +260,29 @@ export abstract class BaseSender {
259260
this.incrementStatsbeatFailure();
260261
return { code: ExportResultCode.SUCCESS };
261262
}
262-
if (this.isRetriableRestError(restError)) {
263-
if (restError.statusCode && !this.isStatsbeatSender) {
263+
264+
// For retriable REST errors
265+
if (this.isRetriableRestError(restError) && !this.isStatsbeatSender) {
266+
if (this.customerStatsbeatMetrics?.isTimeoutError(restError) && !this.isStatsbeatSender) {
267+
this.customerStatsbeatMetrics?.countRetryItems(
268+
envelopes,
269+
RetryCode.CLIENT_TIMEOUT,
270+
"timeout_exception",
271+
);
272+
diag.error("Request timed out. Error message:", restError.message);
273+
} else if (restError.statusCode) {
264274
this.networkStatsbeatMetrics?.countRetry(restError.statusCode);
265275
this.customerStatsbeatMetrics?.countRetryItems(envelopes, restError.statusCode);
266276
}
267-
if (!this.isStatsbeatSender) {
268-
diag.error(
269-
"Retrying due to transient client side error. Error message:",
270-
restError.message,
271-
);
272-
}
277+
diag.error(
278+
"Retrying due to transient client side error. Error message:",
279+
restError.message,
280+
);
273281
return this.persist(envelopes);
274282
}
283+
// For non-retriable REST errors or client exceptions
275284
if (!this.isStatsbeatSender) {
276285
this.networkStatsbeatMetrics?.countException(restError);
277-
}
278-
if (!this.isStatsbeatSender) {
279286
diag.error(
280287
"Envelopes could not be exported and are not retriable. Error message:",
281288
restError.message,

sdk/monitor/monitor-opentelemetry-exporter/src/platform/nodejs/persist/fileSystemPersist.ts

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { confirmDirExists, getShallowDirectorySize } from "./fileSystemHelpers.j
1010
import type { AzureMonitorExporterOptions } from "../../../config.js";
1111
import { readdir, readFile, stat, unlink, writeFile } from "node:fs/promises";
1212
import type { CustomerStatsbeatMetrics } from "../../../export/statsbeat/customerStatsbeat.js";
13-
import { DropCode, TelemetryType } from "../../../export/statsbeat/types.js";
13+
import { DropCode } from "../../../export/statsbeat/types.js";
1414
import type { TelemetryItem as Envelope } from "../../../generated/index.js";
1515

1616
/**
@@ -152,7 +152,16 @@ export class FileSystemPersist implements PersistentStorage {
152152
try {
153153
await confirmDirExists(this._tempDirectory);
154154
} catch (error: any) {
155-
diag.warn(`Error while checking/creating directory: `, error && error.message);
155+
// Check if error is due to permission/readonly issues
156+
if (error?.code === "EACCES" || error?.code === "EPERM") {
157+
this._customerStatsbeatMetrics?.countDroppedItems(envelopes, DropCode.CLIENT_READONLY);
158+
diag.warn(
159+
`Permission denied while checking/creating directory: ${this._tempDirectory}`,
160+
error?.message,
161+
);
162+
} else {
163+
diag.warn(`Error while checking/creating directory: `, error && error.message);
164+
}
156165
return false;
157166
}
158167

@@ -163,7 +172,6 @@ export class FileSystemPersist implements PersistentStorage {
163172
this._customerStatsbeatMetrics?.countDroppedItems(
164173
envelopes,
165174
DropCode.CLIENT_PERSISTENCE_CAPACITY,
166-
TelemetryType.UNKNOWN,
167175
);
168176
diag.warn(
169177
`Not saving data due to max size limit being met. Directory size in bytes is: ${size}`,
@@ -186,8 +194,8 @@ export class FileSystemPersist implements PersistentStorage {
186194
// If the envelopes cannot be written to disk, we send customer statsbeat and warn the user
187195
this._customerStatsbeatMetrics?.countDroppedItems(
188196
envelopes,
189-
DropCode.CLIENT_STORAGE_DISABLED,
190-
TelemetryType.UNKNOWN,
197+
DropCode.CLIENT_EXCEPTION,
198+
writeError?.message,
191199
);
192200
diag.warn(`Error writing file to persistent file storage`, writeError);
193201
return false;

sdk/monitor/monitor-opentelemetry-exporter/test/internal/baseSender.spec.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ export const mockCustomerStatsbeatMetrics = {
9696
countSuccessfulItems: vi.fn(),
9797
countDroppedItems: vi.fn(),
9898
countRetryItems: vi.fn(),
99+
isTimeoutError: vi.fn(),
99100
shutdown: vi.fn(),
100101
};
101102

@@ -488,6 +489,9 @@ describe("BaseSender", () => {
488489
const nonRetriableError: any = new Error("Bad request");
489490
nonRetriableError.statusCode = 400;
490491

492+
// Mock isTimeoutError to return false so timeout logic isn't triggered
493+
mockCustomerStatsbeatMetrics.isTimeoutError.mockReturnValue(false);
494+
491495
sender.sendMock.mockRejectedValue(nonRetriableError);
492496

493497
const result = await sender.exportEnvelopes([{ name: "test", time: new Date() }]);

sdk/monitor/monitor-opentelemetry-exporter/test/internal/fileSystemPersist.spec.ts

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
// Copyright (c) Microsoft Corporation.
22
// Licensed under the MIT License.
33

4+
import { describe, it, assert, expect, beforeEach, vi, afterEach } from "vitest";
5+
46
import * as fs from "node:fs";
57
import * as os from "node:os";
68
import * as path from "node:path";
79
import { FileSystemPersist } from "../../src/platform/nodejs/persist/fileSystemPersist.js";
810
import type { TelemetryItem as Envelope } from "../../src/generated/index.js";
911
import { promisify } from "node:util";
1012
import { FileAccessControl } from "../../src/platform/nodejs/persist/fileAccessControl.js";
11-
import { describe, it, assert, expect, beforeEach } from "vitest";
13+
import { DropCode } from "../../src/export/statsbeat/types.js";
1214

1315
const statAsync = promisify(fs.stat);
1416
const readdirAsync = promisify(fs.readdir);
@@ -199,4 +201,67 @@ describe("FileSystemPersist", () => {
199201
assert.deepStrictEqual(fileValue, null, "File is still present"); // File doesn't exist anymore
200202
});
201203
});
204+
205+
describe("#CLIENT_READONLY scenarios", () => {
206+
let mockCustomerStatsbeat: any;
207+
let originalOSFileProtection: boolean;
208+
209+
beforeEach(() => {
210+
mockCustomerStatsbeat = {
211+
countDroppedItems: vi.fn(),
212+
};
213+
214+
// Store original value and enable file protection for tests
215+
originalOSFileProtection = FileAccessControl.OS_PROVIDES_FILE_PROTECTION;
216+
FileAccessControl.OS_PROVIDES_FILE_PROTECTION = true;
217+
});
218+
219+
afterEach(() => {
220+
vi.restoreAllMocks();
221+
// Restore original value
222+
FileAccessControl.OS_PROVIDES_FILE_PROTECTION = originalOSFileProtection;
223+
});
224+
225+
it("should have CLIENT_READONLY logic implemented", () => {
226+
const persister = new FileSystemPersist(instrumentationKey, {}, mockCustomerStatsbeat);
227+
expect(persister).toBeDefined();
228+
expect(DropCode.CLIENT_READONLY).toBeDefined();
229+
230+
// Verify that our mock is properly set up
231+
expect(mockCustomerStatsbeat.countDroppedItems).toBeDefined();
232+
});
233+
234+
it("should track CLIENT_READONLY when permission errors occur", async () => {
235+
const envelope: Envelope = {
236+
name: "test",
237+
time: new Date(),
238+
};
239+
240+
// Import the module dynamically to mock before usage
241+
const helpersMod = await import("../../src/platform/nodejs/persist/fileSystemHelpers.js");
242+
243+
// Mock confirmDirExists to throw EACCES permission error
244+
const error = new Error("EACCES: permission denied, mkdir") as NodeJS.ErrnoException;
245+
error.code = "EACCES";
246+
const mockConfirmDirExists = vi
247+
.spyOn(helpersMod, "confirmDirExists")
248+
.mockRejectedValue(error);
249+
250+
const persister = new FileSystemPersist(instrumentationKey, {}, mockCustomerStatsbeat);
251+
252+
const result = await persister.push([envelope]);
253+
254+
// Should return false due to permission error
255+
expect(result).toBe(false);
256+
257+
// Should have called countDroppedItems with CLIENT_READONLY
258+
expect(mockCustomerStatsbeat.countDroppedItems).toHaveBeenCalledWith(
259+
[envelope],
260+
DropCode.CLIENT_READONLY,
261+
);
262+
263+
// Restore the spy
264+
mockConfirmDirExists.mockRestore();
265+
});
266+
});
202267
});

0 commit comments

Comments
 (0)