[security]: implement metadata firewall and destructive upsert guardrails#73
[security]: implement metadata firewall and destructive upsert guardrails#73shuvajyotikar13 wants to merge 6 commits intopinecone-io:mainfrom
Conversation
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 2 potential issues.
Autofix Details
Bugbot Autofix prepared fixes for both issues found in the latest run.
- ✅ Fixed: Metadata filtering corrupts matches when both properties exist
- I changed metadata filtering to process
results.recordsandresults.matchesindependently so one array is never overwritten with filtered data from the other.
- I changed metadata filtering to process
- ✅ Fixed: Stale type definitions bypassed with
as anycasts- I added
selectedMetadataKeysandconfirmOverwritetoSearchArgsandUpsertArgsand restored typed argument casts to remove theas anybypass.
- I added
Or push these changes by commenting:
@cursor push 38c2e26957
Preview (38c2e26957)
diff --git a/src/test-utils/mock-pinecone.ts b/src/test-utils/mock-pinecone.ts
--- a/src/test-utils/mock-pinecone.ts
+++ b/src/test-utils/mock-pinecone.ts
@@ -1,13 +1,13 @@
import {vi} from 'vitest';
-export function createMockNamespace() {
+export function createMockNamespace(): any {
return {
searchRecords: vi.fn(),
upsertRecords: vi.fn(),
};
}
-export function createMockIndex() {
+export function createMockIndex(): any {
const mockNamespace = createMockNamespace();
return {
describeIndexStats: vi.fn(),
@@ -16,7 +16,7 @@
};
}
-export function createMockPinecone() {
+export function createMockPinecone(): any {
const mockIndex = createMockIndex();
return {
listIndexes: vi.fn(),
diff --git a/src/tools/database/search-records.ts b/src/tools/database/search-records.ts
--- a/src/tools/database/search-records.ts
+++ b/src/tools/database/search-records.ts
@@ -57,6 +57,12 @@
namespace: z.string().describe('The namespace to search.'),
query: SEARCH_QUERY_SCHEMA,
rerank: RERANK_SCHEMA,
+ selectedMetadataKeys: z
+ .array(z.string())
+ .optional()
+ .describe(
+ 'Optional: List of metadata keys to return. If omitted, all metadata is returned (Potential PII risk).',
+ ),
};
type SearchArgs = {
@@ -64,7 +70,15 @@
namespace: string;
query: SearchQuery;
rerank?: SearchRerank;
+ selectedMetadataKeys?: string[];
};
+type SearchRecord = {
+ metadata?: Record<string, unknown>;
+} & Record<string, unknown>;
+type SearchResults = {
+ records?: SearchRecord[];
+ matches?: SearchRecord[];
+} & Record<string, unknown>;
export function addSearchRecordsTool(server: McpServer) {
registerDatabaseTool(
@@ -72,10 +86,31 @@
'search-records',
{description: INSTRUCTIONS, inputSchema: SCHEMA},
async (args, pc) => {
- const {name, namespace, query, rerank} = args as SearchArgs;
+ const {name, namespace, query, rerank, selectedMetadataKeys} = args as SearchArgs;
try {
const ns = pc.index(name).namespace(namespace);
- const results = await ns.searchRecords({query, rerank});
+ const results = (await ns.searchRecords({query, rerank})) as SearchResults;
+
+ // --- SECURITY PATCH: Metadata Filtering
+ if (selectedMetadataKeys) {
+ const filterRecordMetadata = (recordArray: SearchRecord[]) =>
+ recordArray.map((record) => {
+ if (!record.metadata) return record;
+ const filteredMetadata: Record<string, unknown> = {};
+ selectedMetadataKeys.forEach((key) => {
+ if (record.metadata[key] !== undefined)
+ filteredMetadata[key] = record.metadata[key];
+ });
+ return {...record, metadata: filteredMetadata};
+ });
+
+ if (Array.isArray(results.records)) {
+ results.records = filterRecordMetadata(results.records);
+ }
+ if (Array.isArray(results.matches)) {
+ results.matches = filterRecordMetadata(results.matches);
+ }
+ }
return {
content: [
{
diff --git a/src/tools/database/security.test.ts b/src/tools/database/security.test.ts
new file mode 100644
--- /dev/null
+++ b/src/tools/database/security.test.ts
@@ -1,0 +1,109 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+// 1. HOISTED MOCK: This must be at the very top to intercept the tools' imports
+vi.mock('./common/pinecone-client.js', () => ({
+ getPineconeClient: vi.fn(),
+}));
+
+// Now import the tools and utilities
+import { createMockServer } from '../../test-utils/mock-server.js';
+import { createMockPinecone } from '../../test-utils/mock-pinecone.js';
+import { getPineconeClient } from './common/pinecone-client.js';
+import { addSearchRecordsTool } from './search-records.js';
+import { addUpsertRecordsTool } from './upsert-records.js';
+
+describe('Pinecone MCP Security Hardening', () => {
+ let mockServer: any;
+ let mockPc: any;
+
+ beforeEach(() => {
+ vi.clearAllMocks();
+ mockServer = createMockServer();
+ mockPc = createMockPinecone();
+
+ // 2. Force the client factory to return our mock for every call
+ vi.mocked(getPineconeClient).mockReturnValue(mockPc);
+
+ addSearchRecordsTool(mockServer);
+ addUpsertRecordsTool(mockServer);
+
+ // Satisfy the initial constant check
+ vi.stubEnv('PINECONE_API_KEY', 'test-key');
+ });
+
+ describe('Metadata Firewall (search-records)', () => {
+ it('should strip unauthorized metadata keys from search results', async () => {
+ const mockRecords = {
+ records: [{
+ id: 'vec-1',
+ metadata: { text: 'public', ssn: 'secret-123' }
+ }]
+ };
+
+ mockPc._mockIndex._mockNamespace.searchRecords.mockResolvedValue(mockRecords);
+
+ const tool = mockServer.getRegisteredTool('search-records');
+ const result = await tool.handler({
+ name: 'idx', namespace: 'ns',
+ query: { inputs: { text: 'test' }, topK: 1 },
+ selectedMetadataKeys: ['text']
+ });
+
+ // Helpful debugging: if it fails, show why
+ if (result.isError) {
+ throw new Error(`Tool returned error: ${result.content[0].text}`);
+ }
+
+ const output = JSON.parse(result.content[0].text);
+ expect(output.records[0].metadata.ssn).toBeUndefined();
+ expect(output.records[0].metadata.text).toBe('public');
+ });
+
+ it('should return all metadata if selectedMetadataKeys is omitted', async () => {
+ mockPc._mockIndex._mockNamespace.searchRecords.mockResolvedValue({
+ records: [{ id: '1', metadata: { secret: 'data' } }]
+ });
+
+ const tool = mockServer.getRegisteredTool('search-records');
+ const result = await tool.handler({
+ name: 'idx', namespace: 'ns',
+ query: { inputs: { text: 'test' }, topK: 1 }
+ });
+
+ if (result.isError) {
+ throw new Error(`Tool returned error: ${result.content[0].text}`);
+ }
+
+ const output = JSON.parse(result.content[0].text);
+ expect(output.records[0].metadata.secret).toBe('data');
+ });
+ });
+
+ describe('Upsert Guardrail (upsert-records)', () => {
+ it('should block upsert if confirmOverwrite flag is missing', async () => {
+ const tool = mockServer.getRegisteredTool('upsert-records');
+ const result = await tool.handler({
+ name: 'idx', namespace: 'ns',
+ records: [{ id: '1', text: 'val' }]
+ });
+
+ expect(result.isError).toBe(true);
+ expect(result.content[0].text).toContain('confirmOverwrite must be set to true');
+ });
+
+ it('should execute upsert when confirmOverwrite is true', async () => {
+ mockPc._mockIndex._mockNamespace.upsertRecords.mockResolvedValue(undefined);
+ const tool = mockServer.getRegisteredTool('upsert-records');
+
+ const result = await tool.handler({
+ name: 'idx', namespace: 'ns',
+ records: [{ id: '1', text: 'val' }],
+ confirmOverwrite: true
+ });
+
+ // Final check: ensure no error was returned
+ expect(result.isError).toBeFalsy();
+ expect(result.content[0].text).toBe('Data upserted successfully');
+ });
+ });
+});
\ No newline at end of file
diff --git a/src/tools/database/upsert-records.handler.test.ts b/src/tools/database/upsert-records.handler.test.ts
--- a/src/tools/database/upsert-records.handler.test.ts
+++ b/src/tools/database/upsert-records.handler.test.ts
@@ -42,6 +42,7 @@
name: 'test-index',
namespace: 'test-ns',
records: [{id: '1', content: 'test content'}],
+ confirmOverwrite: true,
});
expect(mockPc.index).toHaveBeenCalledWith('test-index');
@@ -63,6 +64,7 @@
name: 'test-index',
namespace: 'test-ns',
records: [{id: '1', content: 'test content'}],
+ confirmOverwrite: true,
});
expect(result).toEqual({
@@ -80,6 +82,7 @@
name: 'test-index',
namespace: 'test-ns',
records: [{_id: 'alt-1', content: 'test content'}],
+ confirmOverwrite: true,
})) as {content: Array<{text: string}>};
expect(mockPc._mockIndex._mockNamespace.upsertRecords).toHaveBeenCalledWith([
@@ -87,4 +90,24 @@
]);
expect(result.content[0].text).toContain('successfully');
});
-});
+
+ it('fails if confirmOverwrite is missing or false', async () => {
+ // 🛡️ BULLETPROOF SCOPING: Register the tool explicitly for this test
+ // This bypasses any nested 'beforeEach' boundary issues
+ addUpsertRecordsTool(mockServer);
+
+ // Now we are 100% guaranteed the tool exists
+ const tool = mockServer.getRegisteredTool('upsert-records')!;
+
+ const result = (await tool.handler({
+ name: 'test-index',
+ namespace: 'test-ns',
+ records: [{id: '1', content: 'test content'}]
+ })) as any;
+
+ expect(result.isError).toBe(true);
+ expect(result.content[0].text).toContain('confirmOverwrite must be set to true');
+ expect(mockPc._mockIndex._mockNamespace.upsertRecords).not.toHaveBeenCalled();
+ });
+
+});
\ No newline at end of file
diff --git a/src/tools/database/upsert-records.ts b/src/tools/database/upsert-records.ts
--- a/src/tools/database/upsert-records.ts
+++ b/src/tools/database/upsert-records.ts
@@ -35,12 +35,18 @@
name: z.string().describe('The index to upsert into.'),
namespace: z.string().describe('The namespace to upsert into.'),
records: RECORD_SET_SCHEMA,
+ confirmOverwrite: z
+ .boolean()
+ .describe(
+ 'CRITICAL: Set to true to acknowledge that this will overwrite any existing records with the same IDs.',
+ ),
};
type UpsertArgs = {
name: string;
namespace: string;
records: IntegratedRecord[];
+ confirmOverwrite: boolean;
};
export function addUpsertRecordsTool(server: McpServer) {
@@ -49,7 +55,20 @@
'upsert-records',
{description: INSTRUCTIONS, inputSchema: SCHEMA},
async (args, pc) => {
- const {name, namespace, records} = args as UpsertArgs;
+ const {name, namespace, records, confirmOverwrite} = args as UpsertArgs;
+
+ // --- SECURITY PATCH: Destructive Action Guardrail ---
+ if (confirmOverwrite !== true) {
+ return {
+ isError: true,
+ content: [
+ {
+ type: 'text' as const,
+ text: 'Error: confirmOverwrite must be set to true. Upserting records can overwrite existing data. Please ask the user for confirmation.',
+ },
+ ],
+ };
+ }
try {
const ns = pc.index(name).namespace(namespace);
await ns.upsertRecords(records);This Bugbot Autofix run was free. To enable autofix for future PRs, go to the Cursor dashboard.
| } | ||
|
|
||
| export function createMockPinecone() { | ||
| export function createMockPinecone(): any { |
There was a problem hiding this comment.
Explicit any return types erase mock type aliases
Low Severity
Annotating createMockNamespace, createMockIndex, and createMockPinecone with explicit : any return types causes the downstream type aliases MockPinecone, MockIndex, and MockNamespace (defined via ReturnType<...>) to silently collapse to any. All test files importing these types lose type-checking on mock property access, defeating the purpose of the typed mock utilities.
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 1 potential issue.
There are 2 total unresolved issues (including 1 from previous review).
Bugbot Autofix is OFF. To automatically fix reported issues with cloud agents, enable autofix in the Cursor dashboard.



Overview
This PR introduces critical safety and privacy enhancements for the Pinecone MCP server, specifically targeting common vulnerabilities in autonomous agent workflows: Metadata Exfiltration and Silent Data Overwrites.
By moving these checks into the protocol layer (Zod schemas and MCP handlers), we ensure that any LLM or agent consuming this server adheres to a "Least Privilege" and "Human-in-the-Loop" security posture.
Key Changes
1. The Metadata Firewall (
search-records)search-recordstool returned the completemetadataobject for every vector match. In RAG systems, metadata often contains sensitive PII, internal notes, or full document text not meant for raw LLM ingestion.selectedMetadataKeys: string[]parameter to the tool schema. The handler now intercepts the Pinecone SDK response and strictly filters the metadata object to only include the requested keys. If omitted, it defaults to returning all metadata to preserve legacy behavior, but allows agents to explicitly request only what they need.2. The Upsert Guardrail (
upsert-records)upsertoperation is destructive by default (overwriting existing vectors and metadata with matching IDs). An agent hallucinating an ID or executing a poorly scoped update could silently corrupt or poison the vector index.confirmOverwrite: booleanparameter to the schema. The handler short-circuits and returns a descriptive error if this flag is missing or false. This forces agents to proactively acknowledge the destructive nature of the action and (ideally) prompt the user for confirmation before proceeding.3. Security Evals & Test Hardening
src/tools/database/security.test.tssuite to verify both the firewall and the guardrail.upsert-records.handler.test.tsto reflect the new mandatory schema requirements, ensuring no future regressions bypass the confirmation flag.mock-pinecone.tstest utilities to support deeper integration testing.Testing Performed
security.test.tsconfirms PII keys are successfully stripped from search responses.security.test.tsconfirms SDK is completely bypassed ifconfirmOverwriteis missing.pnpm run build.Note
Medium Risk
Introduces a new required
confirmOverwriteargument forupsert-records, which is a behavior/interface change that can break existing callers. The newsearch-recordsmetadata filtering reduces PII exposure but relies on the Pinecone response shape (result.hits[].fields), so mismatches could cause unexpected output.Overview
Adds security guardrails to Pinecone database tools.
search-recordsnow accepts optionalselectedMetadataKeysand, when provided, filters each hit’s returned metadata (fromresult.hits[].fields) down to only those keys to reduce accidental PII exfiltration.upsert-recordsnow requires aconfirmOverwrite: trueflag and short-circuits with an error if missing/false to prevent silent destructive overwrites; tests and mock utilities were updated accordingly.Written by Cursor Bugbot for commit a56836a. This will update automatically on new commits. Configure here.