Skip to content

Commit 65e5f9f

Browse files
namedgraphclaude
andcommitted
Rework proxy Accept regression tests, simplify q-rank bypass
ProxyRequestFilter: - Drop the `quality(MediaType)` helper and the topQ/filter pipeline. Per the JAX-RS spec, `getAcceptableMediaTypes()` is sorted by q descending (Jersey: `HttpHeaderReader.readQualifiedList` → `AcceptableMediaType.COMPARATOR`), so the first non-wildcard type is the top-ranked one. Replace with a single loop that returns on (X)HTML and breaks otherwise. Tests: - Add `GET-proxied-accept-forwarded.sh` regressing 708edfc: send a single specific RDF type as `Accept` and assert the response `Content-Type` matches. Pre-fix, ProxyRequestFilter substituted its own readable-types list for the client's `Accept`, so upstream could pick any RDF format (e.g. rdf-thrift). - Rework `GET-proxied-accept-html-not-preferred.sh` to use HTTP status as the bypass-vs-forward discriminator. The previous content-type assertion was unreachable in a symmetric admin/end-user setup — both paths negotiate the same media type. A UUID-named non-existent path produces 200 on bypass (`ApplicationFilter` strips `?uri=` → admin root) and 404 on forward. - Silence `DEBUG:` echoes in `HEAD-proxied-etag.sh` to match the convention of the other proxy tests. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent aca9825 commit 65e5f9f

4 files changed

Lines changed: 65 additions & 49 deletions

File tree

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL"
5+
initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL"
6+
purge_cache "$END_USER_VARNISH_SERVICE"
7+
purge_cache "$ADMIN_VARNISH_SERVICE"
8+
purge_cache "$FRONTEND_VARNISH_SERVICE"
9+
10+
# add agent to the readers group to be able to read documents
11+
12+
add-agent-to-group.sh \
13+
-f "$OWNER_CERT_FILE" \
14+
-p "$OWNER_CERT_PWD" \
15+
--agent "$AGENT_URI" \
16+
"${ADMIN_BASE_URL}acl/groups/readers/"
17+
18+
# Regression: ProxyRequestFilter must forward the client's Accept header verbatim to the
19+
# upstream, NOT substitute its own readable-types list. Previously the filter built its
20+
# outbound Accept from MediaTypes.getReadable(Model.class) + getReadable(ResultSet.class)
21+
# (everything Jena could ingest, all q=1.0), discarding what the client actually asked for.
22+
# The upstream then content-negotiated against that broad list and could legally pick any
23+
# RDF format — e.g. application/rdf+thrift — even when the client (e.g. SaxonJS document())
24+
# explicitly requested application/rdf+xml or application/xml.
25+
#
26+
# Verify by requesting one specific RDF type and asserting the response matches it.
27+
28+
for accept in 'application/rdf+xml' 'text/turtle' 'application/n-triples'; do
29+
content_type=$(curl -k -f -s -G -w "%{content_type}" -o /dev/null \
30+
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
31+
-H "Accept: $accept" \
32+
--data-urlencode "uri=${END_USER_BASE_URL}" \
33+
"$ADMIN_BASE_URL")
34+
35+
case "$content_type" in
36+
"$accept"*) ;;
37+
*) exit 1 ;;
38+
esac
39+
done

http-tests/proxy/GET-proxied-accept-html-not-preferred.sh

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,21 @@ add-agent-to-group.sh \
2020
# API-client intent and forward — not as browser navigation that wants the app shell.
2121
# Previously, ProxyRequestFilter bypassed on anyMatch(HTML or XHTML in Accept) without
2222
# checking q-rank, so it false-fired on any Accept that mentioned HTML at all and
23-
# returned the local XHTML shell instead of the proxied response.
23+
# returned the local app shell instead of the proxied response.
24+
#
25+
# Discriminator is HTTP status — content-type cannot tell bypass from forward because
26+
# admin and end-user share writer configs (same Accept → same negotiated type on both).
27+
# A UUID-named path that doesn't exist on either origin disambiguates:
28+
# - bypass: ApplicationFilter strips ?uri= → request URI becomes admin root → 200
29+
# - forward: proxy forwards the actual UUID path to end-user → 404
2430

2531
accept_header='application/xml, text/xml;q=0.9, application/xhtml+xml;q=0.8, */*;q=0.7'
32+
non_existing_uri="${END_USER_BASE_URL}$(cat /proc/sys/kernel/random/uuid 2>/dev/null || uuidgen)/"
2633

27-
content_type=$(curl -k -f -s -G -w "%{content_type}" -o /dev/null \
34+
status=$(curl -k -s -G -o /dev/null -w "%{http_code}" \
2835
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
2936
-H "Accept: $accept_header" \
30-
--data-urlencode "uri=${END_USER_BASE_URL}" \
31-
"$END_USER_BASE_URL")
37+
--data-urlencode "uri=${non_existing_uri}" \
38+
"$ADMIN_BASE_URL")
3239

33-
echo "DEBUG: Accept: $accept_header"
34-
echo "DEBUG: Content-Type: $content_type"
35-
echo "DEBUG: Expected: not application/xhtml+xml or text/html (proxy must forward, not return local app shell)"
36-
37-
# fail (exit 1) if the response is the local app shell instead of the proxied content
38-
echo "$content_type" | grep -qvE '^(application/xhtml\+xml|text/html)\b'
40+
[ "$status" = "$STATUS_NOT_FOUND" ] || exit 1

http-tests/proxy/HEAD-proxied-etag.sh

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,5 @@ proxied_etag=$(curl -G --head -k -f -s \
3838
"$ADMIN_BASE_URL" \
3939
| extract_etag)
4040

41-
if [ -z "$proxied_etag" ]; then
42-
echo "DEBUG: Expected ETag header on proxied response, got none"
43-
echo "DEBUG: Direct ETag: $direct_etag"
44-
exit 1
45-
fi
46-
47-
if [ "$proxied_etag" != "$direct_etag" ]; then
48-
echo "DEBUG: Proxied ETag does not match direct ETag"
49-
echo "DEBUG: Direct: $direct_etag"
50-
echo "DEBUG: Proxied: $proxied_etag"
51-
exit 1
52-
fi
41+
[ -n "$proxied_etag" ] || exit 1
42+
[ "$proxied_etag" = "$direct_etag" ] || exit 1

src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,15 @@ public void filter(ContainerRequestContext requestContext) throws IOException
139139
// lower q (e.g. SaxonJS document() sends application/xml q=1.0, application/xhtml+xml q=0.8)
140140
// must still reach the proxy.
141141
// (X)HTML is not offered for proxied documents — rendering external RDF as HTML server-side
142-
// (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector
143-
List<MediaType> nonWildcardAccepts = requestContext.getAcceptableMediaTypes().stream()
144-
.filter(mt -> !mt.isWildcardType() && !mt.isWildcardSubtype())
145-
.toList();
146-
double topQ = nonWildcardAccepts.stream().mapToDouble(ProxyRequestFilter::quality).max().orElse(-1.0);
147-
boolean clientAcceptsHtml = nonWildcardAccepts.stream()
148-
.filter(mt -> quality(mt) == topQ)
149-
.anyMatch(mt -> mt.isCompatible(MediaType.TEXT_HTML_TYPE) ||
150-
mt.isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE));
151-
if (clientAcceptsHtml) return;
142+
// (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector.
143+
// Per the JAX-RS spec, getAcceptableMediaTypes() is sorted by q descending, so the first
144+
// non-wildcard type is the top-ranked one.
145+
for (MediaType mt : requestContext.getAcceptableMediaTypes())
146+
{
147+
if (mt.isWildcardType() || mt.isWildcardSubtype()) continue;
148+
if (mt.isCompatible(MediaType.TEXT_HTML_TYPE) || mt.isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE)) return;
149+
break; // first non-wildcard wasn't (X)HTML — proceed with proxy
150+
}
152151

153152
// strip #fragment (servers do not receive fragment identifiers)
154153
if (targetURI.getFragment() != null)
@@ -212,7 +211,6 @@ else if (agentContext instanceof IDTokenSecurityContext idTokenSecurityContext)
212211
}
213212

214213
MediaType[] clientAcceptTypes = requestContext.getAcceptableMediaTypes().toArray(MediaType[]::new);
215-
216214
if (log.isDebugEnabled()) log.debug("Proxying {} {} → {}", requestContext.getMethod(), requestContext.getUriInfo().getRequestUri(), targetURI);
217215

218216
try
@@ -287,9 +285,9 @@ protected Response getResponse(Response clientResponse)
287285
clientResponse.bufferEntity();
288286
InputStream entity = clientResponse.readEntity(InputStream.class);
289287

290-
Response.ResponseBuilder rb = Response.status(clientResponse.getStatus())
291-
.type(clientResponse.getMediaType())
292-
.entity(entity);
288+
Response.ResponseBuilder rb = Response.status(clientResponse.getStatus()).
289+
type(clientResponse.getMediaType()).
290+
entity(entity);
293291

294292
// forward all Link headers from the external response so the client receives remote hypermedia
295293
// (e.g. sd:endpoint pointing to the remote SPARQL endpoint);
@@ -377,17 +375,4 @@ public Request getRequest()
377375
return request;
378376
}
379377

380-
/**
381-
* Returns the q-value of a media type. Defaults to 1.0 if absent or unparseable.
382-
*
383-
* @param mt media type
384-
* @return quality value in the range [0.0, 1.0]
385-
*/
386-
private static double quality(MediaType mt)
387-
{
388-
String q = mt.getParameters().get("q");
389-
if (q == null) return 1.0;
390-
try { return Double.parseDouble(q); } catch (NumberFormatException e) { return 1.0; }
391-
}
392-
393378
}

0 commit comments

Comments
 (0)