Skip to content

Commit 7fa71a4

Browse files
authored
Wasm tests: add typed-funcref test showing example of desirable optimizations. (#8158)
* Wasm tests: add typed-funcref test showing example of desirable optimizations. In order to have fast IC (inline cache) chains in AOT-compiled dynamic language Wasms, it would be great if we could make the "call to a typed funcref at a constant table index" pattern fast. This use-case was discussed at the most recent Wasmtime biweekly and @jameysharp is working on some optimizations; the intent of this PR is to provide a concrete test-case whose blessed output we can see improve over time. In particular, the following opts are still desirable: - With the use of non-nullable typed funcrefs, there shouldn't be a null check (there currently is, as noted by a comment in the code due to lack of type information at the right spot). - With the use of a constant table size and a constant index to the `table.get`, we should be able to load from the table without a bounds-check or any Spectre masking. Other further optimizations for this pattern might be possible if we rearrange the table and function-reference data structures, and the lazy-initialization scheme thereof, but the above should be agnostic to that. * Add comments to clarify typed funcrefs usage.
1 parent a0505b1 commit 7fa71a4

1 file changed

Lines changed: 166 additions & 0 deletions

File tree

tests/disas/typed-funcrefs.wat

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
;;! target = "x86_64"
2+
;;! test = "optimize"
3+
;;! flags = [ "-Wfunction-references=y" ]
4+
5+
;; This test is meant to simulate how typed funcrefs in a table may be
6+
;; used for ICs (inline caches) in a Wasm module compiled from a dynamic
7+
;; language. In native JIT engines, IC chains have head pointers that
8+
;; are raw code pointers and IC-using code can call each with a few ops
9+
;; (load pointer, call indirect). We'd like similar efficiency by
10+
;; storing funcrefs for the first IC in each chain in a typed-funcref
11+
;; table.
12+
13+
(module
14+
(type $ic-stub (func (param i32 i32 i32 i32) (result i32)))
15+
16+
;; This syntax declares a table that is exactly 100 elements, whose
17+
;; elements are non-nullable function references, and whose default
18+
;; value (needed because non-nullable) is a pointer to `$ic1`.
19+
(table $ic-sites 100 100 (ref $ic-stub) (ref.func $ic1))
20+
21+
(func $ic1 (param i32 i32 i32 i32) (result i32)
22+
local.get 0)
23+
24+
(func $call-ics (param i32 i32 i32 i32) (result i32)
25+
(local $sum i32)
26+
27+
;; IC callsite index 1 (arbitrary).
28+
local.get 0
29+
local.get 1
30+
local.get 2
31+
local.get 3
32+
i32.const 1
33+
table.get $ic-sites
34+
call_ref $ic-stub
35+
local.get $sum
36+
i32.add
37+
local.set $sum
38+
39+
;; IC callsite index 2 (arbitrary).
40+
local.get 0
41+
local.get 1
42+
local.get 2
43+
local.get 3
44+
i32.const 2
45+
table.get $ic-sites
46+
call_ref $ic-stub
47+
local.get $sum
48+
i32.add
49+
local.set $sum
50+
51+
local.get $sum))
52+
;; function u0:0(i64 vmctx, i64, i32, i32, i32, i32) -> i32 fast {
53+
;; gv0 = vmctx
54+
;; gv1 = load.i64 notrap aligned readonly gv0+8
55+
;; gv2 = load.i64 notrap aligned gv1
56+
;; sig0 = (i64 vmctx, i32 uext, i32 uext) -> i32 uext system_v
57+
;; sig1 = (i64 vmctx, i32 uext) -> i32 uext system_v
58+
;; stack_limit = gv2
59+
;;
60+
;; block0(v0: i64, v1: i64, v2: i32, v3: i32, v4: i32, v5: i32):
61+
;; v6 -> v2
62+
;; @002c jump block1
63+
;;
64+
;; block1:
65+
;; @002c return v2
66+
;; }
67+
;;
68+
;; function u0:1(i64 vmctx, i64, i32, i32, i32, i32) -> i32 fast {
69+
;; gv0 = vmctx
70+
;; gv1 = load.i64 notrap aligned readonly gv0+8
71+
;; gv2 = load.i64 notrap aligned gv1
72+
;; gv3 = vmctx
73+
;; gv4 = load.i64 notrap aligned gv3+72
74+
;; sig0 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v
75+
;; sig1 = (i64 vmctx, i64, i32, i32, i32, i32) -> i32 fast
76+
;; sig2 = (i64 vmctx, i32 uext, i32 uext) -> i32 uext system_v
77+
;; sig3 = (i64 vmctx, i32 uext) -> i32 uext system_v
78+
;; stack_limit = gv2
79+
;;
80+
;; block0(v0: i64, v1: i64, v2: i32, v3: i32, v4: i32, v5: i32):
81+
;; v21 -> v0
82+
;; v47 -> v0
83+
;; v56 -> v0
84+
;; v59 -> v0
85+
;; v30 -> v2
86+
;; v31 -> v3
87+
;; v32 -> v4
88+
;; v33 -> v5
89+
;; v62 = iconst.i8 0
90+
;; @003b brif v62, block6, block7 ; v62 = 0
91+
;;
92+
;; block6 cold:
93+
;; @003b trap table_oob
94+
;;
95+
;; block7:
96+
;; @003b v12 = load.i64 notrap aligned v0+72
97+
;; v79 = iconst.i8 0
98+
;; v70 = iconst.i64 8
99+
;; @003b v14 = iadd v12, v70 ; v70 = 8
100+
;; @003b v16 = select_spectre_guard v79, v12, v14 ; v79 = 0
101+
;; @003b v17 = load.i64 notrap aligned table v16
102+
;; v58 = iconst.i64 -2
103+
;; @003b v18 = band v17, v58 ; v58 = -2
104+
;; @003b brif v17, block3(v18), block2
105+
;;
106+
;; block2 cold:
107+
;; @004e v48 = load.i64 notrap aligned readonly v0+56
108+
;; @004e v49 = load.i64 notrap aligned readonly v48+72
109+
;; @002f v7 = iconst.i32 0
110+
;; v28 -> v7
111+
;; @0039 v8 = iconst.i32 1
112+
;; @003b v24 = call_indirect sig0, v49(v0, v7, v8) ; v7 = 0, v8 = 1
113+
;; @003b jump block3(v24)
114+
;;
115+
;; block3(v19: i64):
116+
;; @003d brif v19, block9, block8
117+
;;
118+
;; block8 cold:
119+
;; @003d trap null_reference
120+
;;
121+
;; block9:
122+
;; @003d v25 = load.i64 notrap aligned readonly v19+16
123+
;; @003d v26 = load.i64 notrap aligned readonly v19+32
124+
;; @003d v27 = call_indirect sig1, v25(v26, v0, v2, v3, v4, v5)
125+
;; v80 = iconst.i8 0
126+
;; @004e brif v80, block10, block11 ; v80 = 0
127+
;;
128+
;; block10 cold:
129+
;; @004e trap table_oob
130+
;;
131+
;; block11:
132+
;; @004e v38 = load.i64 notrap aligned v0+72
133+
;; v81 = iconst.i8 0
134+
;; v78 = iconst.i64 16
135+
;; @004e v40 = iadd v38, v78 ; v78 = 16
136+
;; @004e v42 = select_spectre_guard v81, v38, v40 ; v81 = 0
137+
;; @004e v43 = load.i64 notrap aligned table v42
138+
;; v82 = iconst.i64 -2
139+
;; v83 = band v43, v82 ; v82 = -2
140+
;; @004e brif v43, block5(v83), block4
141+
;;
142+
;; block4 cold:
143+
;; v84 = load.i64 notrap aligned readonly v0+56
144+
;; v85 = load.i64 notrap aligned readonly v84+72
145+
;; v86 = iconst.i32 0
146+
;; @004c v34 = iconst.i32 2
147+
;; @004e v50 = call_indirect sig0, v85(v0, v86, v34) ; v86 = 0, v34 = 2
148+
;; @004e jump block5(v50)
149+
;;
150+
;; block5(v45: i64):
151+
;; @0050 brif v45, block13, block12
152+
;;
153+
;; block12 cold:
154+
;; @0050 trap null_reference
155+
;;
156+
;; block13:
157+
;; @0050 v51 = load.i64 notrap aligned readonly v45+16
158+
;; @0050 v52 = load.i64 notrap aligned readonly v45+32
159+
;; @0050 v53 = call_indirect sig1, v51(v52, v0, v2, v3, v4, v5)
160+
;; @0059 jump block1
161+
;;
162+
;; block1:
163+
;; @0054 v55 = iadd.i32 v53, v27
164+
;; v6 -> v55
165+
;; @0059 return v55
166+
;; }

0 commit comments

Comments
 (0)