Conversation
Three experiments to diagnose PDF input failures via OpenRouter: - pdf-vs-image: proves image_url format fails for PDFs, file+plugin works - pdf-message-shape-matrix: tests shape (file-only vs text+file) × format - pdf-direct-input: compares PDF support across OpenAI/Anthropic/Google Key finding: 'file' content type with file-parser plugin is the universal format. AI SDK's image_url approach fails for OpenAI PDFs.
- Add shared request-cache utility that caches API responses to disk - Update fetch/pdf-direct-input to use caching - Add ai-sdk-v5/pdf-openai-regression tests with caching - Cache avoids hitting OpenRouter API repeatedly during development
If response body parses as JSON, store as body.json object. Otherwise store as body.text string. Makes cache files readable.
|
Review the following changes in direct dependencies. Learn more about Socket for GitHub.
|
mohamdawad135-oss
left a comment
There was a problem hiding this comment.
// utils/request-cache.js
import fs from 'fs/promises';
import path from 'path';
import crypto from 'crypto';
export class RequestCache {
constructor(cacheDir = './.cache') {
this.cacheDir = cacheDir;
}
async getKey(key) {
const hash = crypto.createHash('md5').update(key).digest('hex');
return path.join(this.cacheDir, ${hash}.json);
}
async get(key) {
try {
const filePath = await this.getKey(key);
const data = await fs.readFile(filePath, 'utf-8');
return JSON.parse(data);
} catch {
return null;
}
}
async set(key, value, ttl = 3600) {
await fs.mkdir(this.cacheDir, { recursive: true });
const filePath = await this.getKey(key);
const cacheData = {
value,
expiresAt: Date.now() + (ttl * 1000)
};
await fs.writeFile(filePath, JSON.stringify(cacheData));
}
async clear() {
await fs.rm(this.cacheDir, { recursive: true, force: true });
}
}
// fetch/pdf-direct-input.js
import { RequestCache } from '../utils/request-cache.js';
const cache = new RequestCache();
export async function fetchWithCache(url, options = {}) {
const cacheKey = fetch:${url}:${JSON.stringify(options)};
// محاولة جلب البيانات من التخزين المؤقت
const cached = await cache.get(cacheKey);
if (cached && cached.expiresAt > Date.now()) {
console.log('📦 Using cached response for:', url);
return cached.value;
}
// جلب البيانات من API إذا لم تكن موجودة في التخزين المؤقت
console.log('🌐 Fetching from API:', url);
const response = await fetch(url, options);
const data = await response.json();
// تخزين النتيجة في الذاكرة المؤقتة
await cache.set(cacheKey, data, 3600); // صلاحية ساعة واحدة
return data;
}
// tests/ai-sdk-v5/pdf-openai-regression.test.js
import { RequestCache } from '../../utils/request-cache.js';
import { processPDFWithAI } from '../pdf-processor.js';
describe('PDF OpenAI Regression Tests with Caching', () => {
let cache;
beforeAll(() => {
cache = new RequestCache('./.test-cache');
});
afterAll(async () => {
await cache.clear();
});
test('should cache PDF processing requests', async () => {
const pdfUrl = 'https://example.com/test.pdf';
const cacheKey = pdf-process:${pdfUrl};
// المرة الأولى: يجب جلب البيانات من API
const result1 = await processPDFWithAI(pdfUrl, { cache });
expect(result1).toBeDefined();
// المرة الثانية: يجب استخدام التخزين المؤقت
const result2 = await processPDFWithAI(pdfUrl, { cache });
expect(result2).toEqual(result1);
});
});
{
"scripts": {
"dev": "node --experimental-modules server.js",
"test": "jest --testPathPattern=pdf",
"clear-cache": "rm -rf .cache .test-cache",
"dev:cache": "CACHE_ENABLED=true npm run dev"
}
}
// config/cache-config.js
export const cacheConfig = {
enabled: process.env.CACHE_ENABLED === 'true',
ttl: {
pdf: 3600, // ساعة واحدة لملفات PDF
api: 300, // 5 دقائق للطلبات العامة
test: 60 // دقيقة واحدة للاختبارات
},
directories: {
main: './.cache',
test: './.test-cache'
}
};
No description provided.