gogpu · kolkov · Mar 15, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
@@ -7,6 +7,60 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.21.0] - 2026-03-15
+
+### Added
+
+- **public API: complete three-layer WebGPU stack** — The root `wgpu` package now
+  provides a full typed API for GPU programming. All operations go through
+  wgpu (public) → wgpu/core (validation) → wgpu/hal (backend). Consumers never
+  need to import `wgpu/hal` for standard use.
+
+- **public API: SetLogger / Logger** — `wgpu.SetLogger()` and `wgpu.Logger()`
+  propagate the logger to the entire stack (API, core, HAL backends).
+
+- **public API: Fence and async submission** — `Fence` type, `Device.CreateFence()`,
+  `WaitForFence()`, `ResetFence()`, `GetFenceStatus()`, `FreeCommandBuffer()`.
+  `Queue.SubmitWithFence()` for non-blocking GPU submission with fence signaling.
+
+- **public API: Surface lifecycle** — `Surface.SetPrepareFrame()` for platform
+  HiDPI/DPI hooks. `Surface.DiscardTexture()` for canceled frames. `Surface.HAL()`
+  escape hatch. Delegates to `core.Surface` state machine.
+
+- **public API: CommandEncoder extensions** — `CopyTextureToBuffer()`,
+  `TransitionTextures()`, `DiscardEncoding()`. All use wgpu types (no hal in signatures).
+
+- **public API: HAL accessors** — `Device.HalDevice()`, `Device.HalQueue()`,
+  `Texture.HalTexture()`, `TextureView.HalTextureView()` for advanced interop.
+
+- **public API: proper type definitions** — Replaced hal type aliases with proper
+  structs: `Extent3D`, `Origin3D`, `ImageDataLayout`, `DepthStencilState`,
+  `StencilFaceState`, `TextureBarrier`, `TextureRange`, `TextureUsageTransition`,
+  `BufferTextureCopy`. Unexported `toHAL()` converters. No hal leakage in godoc.
+
+- **core: complete resource types (CORE-001)** — All 12 stub resource types
+  (Texture, Sampler, BindGroupLayout, PipelineLayout, BindGroup, ShaderModule,
+  RenderPipeline, ComputePipeline, CommandEncoder, CommandBuffer, QuerySet, Surface)
+  now have full struct definitions with HAL handle wrapping.
+
+- **core: Surface state machine (CORE-002)** — Unconfigured → Configured → Acquired
+  lifecycle with PrepareFrameFunc hook and auto-reconfigure on dimension changes.
+
+- **core: CommandEncoder state machine (CORE-003)** — Recording/InRenderPass/
+  InComputePass/Finished/Error states with validated transitions.
+
+- **core: resource accessors (CORE-004)** — Read-only accessors and idempotent
+  Destroy() for all resource types.
+
+- **cmd/wgpu-triangle** — Single-threaded wgpu API triangle example.
+
+- **cmd/wgpu-triangle-mt** — Multi-threaded wgpu API triangle example.
+
+### Changed
+
+- **Updated naga v0.14.6 → v0.14.7** — Fixes MSL sequential per-type binding
+  indices across bind groups.
+
 ## [0.20.2] - 2026-03-12
 
 ### Fixed

@@ -19,19 +19,31 @@
 
 ---
 
-## Current State: v0.18.1
+## Current State: v0.21.0
 
 ✅ **All 5 HAL backends complete** (~80K LOC, ~100K total)
-✅ **Public API root package** — `import "github.com/gogpu/wgpu"`
-
-**New in v0.18.1:**
-- Vulkan: fix buffer-to-image copy row stride corruption — use format's block copy size instead of inferring from padded `BytesPerRow / Width` (gogpu#96)
-
-**New in v0.18.0:**
-- Public API root package with 20 user-facing types wrapping core/ and hal/
+✅ **Three-layer WebGPU stack** — wgpu API → wgpu/core → wgpu/hal
+✅ **Complete public API** — consumers never import `wgpu/hal`
+
+**New in v0.21.0:**
+- Complete three-layer architecture: public API → core validation → HAL backends
+- core: Surface lifecycle state machine, CommandEncoder state machine, 12 resource types
+- Proper type definitions (no hal aliases in godoc): Extent3D, DepthStencilState, TextureBarrier, etc.
+- Fence + async submission (SubmitWithFence), Surface PrepareFrame hook
+- SetLogger/Logger for stack-wide logging propagation
+- naga v0.14.7 (MSL binding index fix)
+
+**New in v0.20.2:**
+- Vulkan: validate WSI query functions in LoadInstance (prevents nil pointer SIGSEGV)
+
+**New in v0.20.1:**
+- Metal: missing stencil attachment in render pass (macOS rendering fix)
+- Metal: missing setClearDepth: call
+
+**New in v0.20.0:**
+- Public API root package with typed wrappers for core/ and hal/
 - WebGPU-spec-aligned flow: `CreateInstance()` → `RequestAdapter()` → `RequestDevice()`
 - Synchronous `Queue.Submit()` with internal fence management
-- Type aliases from `gputypes` — no extra imports needed
 - Deterministic `Release()` cleanup on all resource types
 
 **New in v0.16.17:**

@@ -0,0 +1,269 @@
+//go:build windows
+
+// Command wgpu-triangle tests the wgpu public API rendering pipeline.
+// Multi-threaded: main thread = window events, render thread = GPU ops.
+// Same architecture as gogpu renderer.
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"runtime"
+	"time"
+
+	"github.com/gogpu/gputypes"
+	"github.com/gogpu/wgpu"
+	_ "github.com/gogpu/wgpu/hal/vulkan"
+	"github.com/gogpu/wgpu/internal/thread"
+)
+
+const (
+	windowWidth  = 800
+	windowHeight = 600
+	windowTitle  = "wgpu API Triangle Test (Multi-Thread)"
+)
+
+func init() {
+	runtime.LockOSThread()
+}
+
+func main() {
+	if err := run(); err != nil {
+		fmt.Fprintf(os.Stderr, "FATAL: %v\n", err)
+		os.Exit(1)
+	}
+}
+
+//nolint:gocognit,gocyclo,cyclop,funlen // example code — intentionally sequential
+func run() error {
+	log.Println("=== wgpu Multi-Thread Triangle Test ===")
+
+	// 1. Window (main thread)
+	window, err := NewWindow(windowTitle, windowWidth, windowHeight)
+	if err != nil {
+		return fmt.Errorf("window: %w", err)
+	}
+	defer window.Destroy()
+	log.Println("1. Window created")
+
+	// 2. Render thread
+	renderLoop := thread.NewRenderLoop()
+	defer renderLoop.Stop()
+	log.Println("2. Render thread created")
+
+	// 3-9. Init GPU on render thread
+	var instance *wgpu.Instance
+	var surface *wgpu.Surface
+	var device *wgpu.Device
+	var pipeline *wgpu.RenderPipeline
+	var pipelineLayout *wgpu.PipelineLayout
+	var shader *wgpu.ShaderModule
+	var initErr error
+
+	renderLoop.RunOnRenderThreadVoid(func() {
+		instance, err = wgpu.CreateInstance(&wgpu.InstanceDescriptor{
+			Backends: gputypes.BackendsVulkan,
+		})
+		if err != nil {
+			initErr = fmt.Errorf("instance: %w", err)
+			return
+		}
+
+		surface, err = instance.CreateSurface(0, window.Handle())
+		if err != nil {
+			initErr = fmt.Errorf("surface: %w", err)
+			return
+		}
+
+		adapter, err := instance.RequestAdapter(nil)
+		if err != nil {
+			initErr = fmt.Errorf("adapter: %w", err)
+			return
+		}
+		log.Printf("   Adapter: %s", adapter.Info().Name)
+
+		device, err = adapter.RequestDevice(nil)
+		if err != nil {
+			initErr = fmt.Errorf("device: %w", err)
+			return
+		}
+
+		w, h := window.Size()
+		err = surface.Configure(device, &wgpu.SurfaceConfiguration{
+			Format:      gputypes.TextureFormatBGRA8Unorm,
+			Usage:       gputypes.TextureUsageRenderAttachment,
+			Width:       safeUint32(w),
+			Height:      safeUint32(h),
+			PresentMode: gputypes.PresentModeFifo,
+			AlphaMode:   gputypes.CompositeAlphaModeOpaque,
+		})
+		if err != nil {
+			initErr = fmt.Errorf("configure: %w", err)
+			return
+		}
+
+		shader, err = device.CreateShaderModule(&wgpu.ShaderModuleDescriptor{
+			Label: "Triangle",
+			WGSL:  triangleShaderWGSL,
+		})
+		if err != nil {
+			initErr = fmt.Errorf("shader: %w", err)
+			return
+		}
+
+		pipelineLayout, err = device.CreatePipelineLayout(&wgpu.PipelineLayoutDescriptor{
+			Label: "Triangle Layout",
+		})
+		if err != nil {
+			initErr = fmt.Errorf("layout: %w", err)
+			return
+		}
+
+		pipeline, err = device.CreateRenderPipeline(&wgpu.RenderPipelineDescriptor{
+			Label:  "Triangle Pipeline",
+			Layout: pipelineLayout,
+			Vertex: wgpu.VertexState{
+				Module:     shader,
+				EntryPoint: "vs_main",
+			},
+			Fragment: &wgpu.FragmentState{
+				Module:     shader,
+				EntryPoint: "fs_main",
+				Targets: []gputypes.ColorTargetState{{
+					Format:    gputypes.TextureFormatBGRA8Unorm,
+					WriteMask: gputypes.ColorWriteMaskAll,
+				}},
+			},
+		})
+		if err != nil {
+			initErr = fmt.Errorf("pipeline: %w", err)
+			return
+		}
+
+		log.Println("3-9. GPU initialized on render thread")
+	})
+
+	if initErr != nil {
+		return initErr
+	}
+
+	// 10. Render loop
+	log.Println("=== Render loop started ===")
+	frameCount := 0
+	startTime := time.Now()
+
+	for window.PollEvents() {
+		var frameErr error
+
+		renderLoop.RunOnRenderThreadVoid(func() {
+			// Acquire
+			surfaceTex, _, err := surface.GetCurrentTexture()
+			if err != nil {
+				frameErr = fmt.Errorf("GetCurrentTexture: %w", err)
+				return
+			}
+
+			view, err := surfaceTex.CreateView(nil)
+			if err != nil {
+				frameErr = fmt.Errorf("CreateView: %w", err)
+				surface.DiscardTexture()
+				return
+			}
+
+			// Encode
+			encoder, err := device.CreateCommandEncoder(&wgpu.CommandEncoderDescriptor{Label: "Frame"})
+			if err != nil {
+				frameErr = fmt.Errorf("CreateCommandEncoder: %w", err)
+				view.Release()
+				return
+			}
+
+			renderPass, err := encoder.BeginRenderPass(&wgpu.RenderPassDescriptor{
+				ColorAttachments: []wgpu.RenderPassColorAttachment{{
+					View:       view,
+					LoadOp:     gputypes.LoadOpClear,
+					StoreOp:    gputypes.StoreOpStore,
+					ClearValue: gputypes.Color{R: 0, G: 0, B: 0.5, A: 1},
+				}},
+			})
+			if err != nil {
+				frameErr = fmt.Errorf("BeginRenderPass: %w", err)
+				view.Release()
+				return
+			}
+
+			renderPass.SetPipeline(pipeline)
+			renderPass.Draw(3, 1, 0, 0)
+			if err := renderPass.End(); err != nil {
+				frameErr = fmt.Errorf("end: %w", err)
+				view.Release()
+				return
+			}
+
+			commands, err := encoder.Finish()
+			if err != nil {
+				frameErr = fmt.Errorf("finish: %w", err)
+				view.Release()
+				return
+			}
+
+			if err := device.Queue().Submit(commands); err != nil {
+				frameErr = fmt.Errorf("submit: %w", err)
+			}
+
+			if err := surface.Present(surfaceTex); err != nil {
+				frameErr = fmt.Errorf("present: %w", err)
+			}
+
+			view.Release()
+		})
+
+		if frameErr != nil {
+			log.Printf("Frame error: %v", frameErr)
+			continue
+		}
+
+		frameCount++
+		if frameCount%60 == 0 {
+			fps := float64(frameCount) / time.Since(startTime).Seconds()
+			log.Printf("Frame %d (%.1f FPS)", frameCount, fps)
+		}
+	}
+
+	// Cleanup on render thread
+	renderLoop.RunOnRenderThreadVoid(func() {
+		pipeline.Release()
+		pipelineLayout.Release()
+		shader.Release()
+		surface.Unconfigure()
+		surface.Release()
+	})
+
+	log.Printf("Done. %d frames", frameCount)
+	return nil
+}
+
+const triangleShaderWGSL = `
+@vertex
+fn vs_main(@builtin(vertex_index) idx: u32) -> @builtin(position) vec4<f32> {
+    var positions = array<vec2<f32>, 3>(
+        vec2<f32>(0.0, 0.5),
+        vec2<f32>(-0.5, -0.5),
+        vec2<f32>(0.5, -0.5)
+    );
+    return vec4<f32>(positions[idx], 0.0, 1.0);
+}
+
+@fragment
+fn fs_main() -> @location(0) vec4<f32> {
+    return vec4<f32>(1.0, 0.0, 0.0, 1.0);
+}
+`
+
+func safeUint32(v int32) uint32 {
+	if v < 0 {
+		return 0
+	}
+	return uint32(v)
+}