-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscholar_test.go
More file actions
348 lines (284 loc) · 11.1 KB
/
scholar_test.go
File metadata and controls
348 lines (284 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
package go_scholar
import (
"fmt"
"github.com/stretchr/testify/assert"
"io"
"net/http"
"os"
"strings"
"testing"
"time"
)
// MockHTTPClient implements HTTPClient interface for testing
type MockHTTPClient struct{}
// MockRateLimitHTTPClient implements HTTPClient interface for testing rate limiting
type MockRateLimitHTTPClient struct {
callCount int
shouldReturn429 bool
}
func (m *MockHTTPClient) Do(req *http.Request) (*http.Response, error) {
url := req.URL.String()
// Mock profile request - check if it's a profile query
if strings.Contains(url, "/citations?user=") && strings.Contains(url, "&cstart=") {
return m.mockProfileResponse()
}
// Mock article request - check if it's an article view
if strings.Contains(url, "view_citation") {
return m.mockArticleResponse()
}
// Default to empty response for unknown URLs
return &http.Response{
StatusCode: 404,
Body: io.NopCloser(strings.NewReader("")),
}, nil
}
func (m *MockHTTPClient) mockProfileResponse() (*http.Response, error) {
content, err := os.ReadFile("sample_author_page.html")
if err != nil {
return nil, err
}
return &http.Response{
StatusCode: 200,
Body: io.NopCloser(strings.NewReader(string(content))),
}, nil
}
func (m *MockHTTPClient) mockArticleResponse() (*http.Response, error) {
content, err := os.ReadFile("sample_article_page.html")
if err != nil {
return nil, err
}
return &http.Response{
StatusCode: 200,
Body: io.NopCloser(strings.NewReader(string(content))),
}, nil
}
func (m *MockRateLimitHTTPClient) Do(req *http.Request) (*http.Response, error) {
m.callCount++
// Return 429 for the first call to test retry logic
if m.shouldReturn429 && m.callCount == 1 {
return &http.Response{
StatusCode: 429,
Status: "Too Many Requests",
Body: io.NopCloser(strings.NewReader("")),
}, nil
}
// For subsequent calls or when not testing 429, return success
url := req.URL.String()
// Mock profile request - check if it's a profile query
if strings.Contains(url, "/citations?user=") && strings.Contains(url, "&cstart=") {
return m.mockProfileResponse()
}
// Mock article request - check if it's an article view
if strings.Contains(url, "view_citation") {
return m.mockArticleResponse()
}
// Default to empty response for unknown URLs
return &http.Response{
StatusCode: 404,
Body: io.NopCloser(strings.NewReader("")),
}, nil
}
func (m *MockRateLimitHTTPClient) mockProfileResponse() (*http.Response, error) {
content, err := os.ReadFile("sample_author_page.html")
if err != nil {
return nil, err
}
return &http.Response{
StatusCode: 200,
Body: io.NopCloser(strings.NewReader(string(content))),
}, nil
}
func (m *MockRateLimitHTTPClient) mockArticleResponse() (*http.Response, error) {
content, err := os.ReadFile("sample_article_page.html")
if err != nil {
return nil, err
}
return &http.Response{
StatusCode: 200,
Body: io.NopCloser(strings.NewReader(string(content))),
}, nil
}
// Test article limiting functionality
func TestArticleLimiting(t *testing.T) {
sch := New("profiles.json", "articles.json")
sch.SetHTTPClient(&MockHTTPClient{})
sch.SetRequestDelay(1 * time.Millisecond) // Fast delay for testing
// Test different limits
testCases := []int{1, 2, 5, 10}
for _, limit := range testCases {
t.Run(fmt.Sprintf("Limit_%d", limit), func(t *testing.T) {
articles, err := sch.QueryProfile("SbUmSEAAAAAJ", limit)
assert.NoError(t, err)
assert.Len(t, articles, limit, "Should return exactly %d articles", limit)
// Verify articles have titles (basic sanity check)
for i, article := range articles {
assert.NotEmpty(t, article.Title, "Article %d should have a title", i+1)
}
})
}
}
func TestGetArticles(t *testing.T) {
// Test that we can create a Scholar instance and set mock client
sch := New("profiles.json", "articles.json")
// Set a fast delay for testing
sch.SetRequestDelay(1 * time.Millisecond)
sch.SetHTTPClient(&MockHTTPClient{})
// Test should not make real network requests
assert.NotNil(t, sch)
}
func TestScholarQuerier(t *testing.T) {
// Test basic Scholar creation
sch := New("profiles.json", "articles.json")
assert.NotNil(t, sch)
}
func TestMockHTTPClient(t *testing.T) {
// Test that MockHTTPClient returns appropriate responses
mock := &MockHTTPClient{}
// Test profile request
profileReq, _ := http.NewRequest("GET", "https://scholar.google.com/citations?user=SbUmSEAAAAAJ&cstart=0&pagesize=1", nil)
profileResp, err := mock.Do(profileReq)
assert.Nil(t, err)
assert.Equal(t, 200, profileResp.StatusCode)
// Test article request
articleReq, _ := http.NewRequest("GET", "https://scholar.google.com/citations?view_op=view_citation&hl=en&user=SbUmSEAAAAAJ", nil)
articleResp, err := mock.Do(articleReq)
assert.Nil(t, err)
assert.Equal(t, 200, articleResp.StatusCode)
// Test unknown request
unknownReq, _ := http.NewRequest("GET", "https://example.com", nil)
unknownResp, err := mock.Do(unknownReq)
assert.Nil(t, err)
assert.Equal(t, 404, unknownResp.StatusCode)
}
func TestProfileQuerier(t *testing.T) {
sch := New("profiles.json", "articles.json")
// Set a fast delay for testing to avoid slow tests
sch.SetRequestDelay(1 * time.Millisecond)
// Set mock HTTP client to avoid real network requests
sch.SetHTTPClient(&MockHTTPClient{})
articles, err := sch.QueryProfile("SbUmSEAAAAAJ", 1)
assert.Nil(t, err)
assert.NotEmpty(t, articles)
for _, article := range articles {
fmt.Println(article)
}
}
func TestThrottling(t *testing.T) {
sch := New("profiles.json", "articles.json")
// Set a very short delay for testing (10ms)
sch.SetRequestDelay(10 * time.Millisecond)
sch.SetHTTPClient(&MockHTTPClient{})
// Make multiple requests and measure timing
start := time.Now()
// Make 3 requests
for i := 0; i < 3; i++ {
_, err := sch.QueryProfile("SbUmSEAAAAAJ", 1)
assert.Nil(t, err)
}
elapsed := time.Since(start)
// Should take at least 2 * 10ms = 20ms (2 delays between 3 requests)
// We allow some tolerance for test timing
assert.True(t, elapsed >= 20*time.Millisecond, "Throttling should enforce delays between requests")
}
func TestRateLimitRetry(t *testing.T) {
sch := New("profiles.json", "articles.json")
// Set a very short delay for testing
sch.SetRequestDelay(1 * time.Millisecond)
mockClient := &MockRateLimitHTTPClient{shouldReturn429: true}
sch.SetHTTPClient(mockClient)
// This should succeed after the first 429 retry
// Use queryArticles=false to avoid making article queries which would increase call count
articles, err := sch.QueryProfileDumpResponse("SbUmSEAAAAAJ", false, 1, false)
assert.Nil(t, err)
assert.NotEmpty(t, articles)
// Should have made 2 calls (first 429, second success)
assert.Equal(t, 2, mockClient.callCount)
}
func TestRequestDelayConfiguration(t *testing.T) {
sch := New("profiles.json", "articles.json")
// Test default delay (2 seconds)
assert.Equal(t, 2*time.Second, sch.requestDelay)
// Test setting custom delay
customDelay := 500 * time.Millisecond
sch.SetRequestDelay(customDelay)
assert.Equal(t, customDelay, sch.requestDelay)
}
// MockAlwaysFailHTTPClient returns 500 to simulate server failure without
// triggering the 429 retry/backoff logic (which is tested separately in TestRateLimitRetry).
type MockAlwaysFailHTTPClient struct{}
func (m *MockAlwaysFailHTTPClient) Do(req *http.Request) (*http.Response, error) {
return &http.Response{
StatusCode: 500,
Status: "Internal Server Error",
Body: io.NopCloser(strings.NewReader("")),
}, nil
}
// Test that when profile cache is expired and refresh fails, stale cached data is returned
func TestStaleCacheFallback(t *testing.T) {
sch := New("profiles.json", "articles.json")
sch.SetRequestDelay(1 * time.Millisecond)
sch.SetHTTPClient(&MockHTTPClient{})
// First query populates the cache
articles, err := sch.QueryProfileWithMemoryCache("SbUmSEAAAAAJ", 10)
assert.NoError(t, err)
assert.NotEmpty(t, articles)
originalCount := len(articles)
// Expire the profile cache by storing it with an old timestamp
profileResult, _ := sch.profile.Load("SbUmSEAAAAAJ")
profile := profileResult.(Profile)
profile.LastRetrieved = time.Now().Add(-8 * 24 * time.Hour) // 8 days ago (past 7-day expiry)
sch.profile.Store("SbUmSEAAAAAJ", profile)
// Now switch to a client that always fails
sch.SetHTTPClient(&MockAlwaysFailHTTPClient{})
// Query again — should fall back to stale cache, not return an error
articles, err = sch.QueryProfileWithMemoryCache("SbUmSEAAAAAJ", 10)
assert.NoError(t, err, "Should not return error when stale cache is available")
assert.Equal(t, originalCount, len(articles), "Should return same articles from stale cache")
}
// Test that profile refresh with queryArticles=false correctly preserves article URLs
// and returns cached article details
func TestProfileRefreshPreservesArticles(t *testing.T) {
sch := New("profiles.json", "articles.json")
sch.SetRequestDelay(1 * time.Millisecond)
sch.SetHTTPClient(&MockHTTPClient{})
// First query populates both profile and article caches (queryArticles=true for cache miss)
articles, err := sch.QueryProfileWithMemoryCache("SbUmSEAAAAAJ", 10)
assert.NoError(t, err)
assert.NotEmpty(t, articles)
originalCount := len(articles)
// Verify articles have full details (authors populated by QueryArticle)
for _, a := range articles {
assert.NotEmpty(t, a.ScholarURL, "Article should have ScholarURL")
assert.NotEmpty(t, a.Authors, "Article should have Authors from detail page")
}
// Expire the profile cache so the next call triggers a profile-only refresh
profileResult, _ := sch.profile.Load("SbUmSEAAAAAJ")
profile := profileResult.(Profile)
profile.LastRetrieved = time.Now().Add(-8 * 24 * time.Hour) // 8 days ago
sch.profile.Store("SbUmSEAAAAAJ", profile)
// Second query should refresh profile (queryArticles=false) and serve article details from cache
articles2, err := sch.QueryProfileWithMemoryCache("SbUmSEAAAAAJ", 10)
assert.NoError(t, err)
assert.Equal(t, originalCount, len(articles2), "Should return same number of articles after profile refresh")
// Verify articles still have full details from cache
for _, a := range articles2 {
assert.NotEmpty(t, a.ScholarURL, "Article should still have ScholarURL after profile refresh")
assert.NotEmpty(t, a.Authors, "Article should still have Authors from cache after profile refresh")
}
}
// Test pagination behavior by attempting to request more articles than available on one page
func TestPaginationLogic(t *testing.T) {
sch := New("profiles.json", "articles.json")
sch.SetRequestDelay(1 * time.Millisecond)
sch.SetHTTPClient(&MockHTTPClient{})
// The sample data has 58 articles in one page. When we request more,
// pagination should kick in but since mock returns the same page, we should get 58
articles, err := sch.QueryProfileDumpResponse("SbUmSEAAAAAJ", false, 100, false)
assert.NoError(t, err)
// Should return 58 articles (all available in sample data)
assert.Equal(t, 58, len(articles), "Should return all 58 articles from sample data")
// Verify articles have titles (basic sanity check)
for i, article := range articles {
assert.NotEmpty(t, article.Title, "Article %d should have a title", i+1)
}
}