Skip to content

Commit 932ba5f

Browse files
authored
[0035] Address several minor spec issues (#783)
Fixes #734 Fixes #780 Fixes #782
1 parent 690765a commit 932ba5f

1 file changed

Lines changed: 41 additions & 37 deletions

File tree

proposals/0035-linalg-matrix.md

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,8 +1066,12 @@ generated:
10661066
10671067
### DXIL Operations
10681068
1069+
A new overload shape `[MatTy]` is introduced in the signatures below. This
1070+
shall be the `<mangling>` part of `%dx.types.LinAlgMatrix<mangling>` preceded
1071+
by the letter `m`.
1072+
10691073
```llvm
1070-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.fillMatrix.[MatTy].[TY](
1074+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgFillMatrix.[MatTy].[TY](
10711075
immarg i32, ; opcode
10721076
[Ty] ; fill value
10731077
)
@@ -1078,20 +1082,20 @@ matrix component's type, a type conversion is applied following the rules
10781082
documented in the [Conversions](#conversions) section.
10791083

10801084
```llvm
1081-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.copyConvertMatrix.[MatTy1].[MatTy2](
1085+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgCopyConvertMatrix.[MatTy1].[MatTy2](
10821086
immarg i32, ; opcode
10831087
%dx.types.LinAlgMatrix<mangling>, ; matrix source
10841088
immarg i1, ; transpose
10851089
)
10861090
```
10871091

1088-
Converts the element and use type of the source matrix to the destination
1089-
matrix, and optionally transpose the matrix. The source matrix remains valid and
1090-
unmodified after this operation is applied. Validation shall enforce that both
1091-
matrices have the same scope and dimensions.
1092+
Returns a new matrix which is a copy of the source matrix where the element and
1093+
use type of the returned matrix have been converted to `MatTy1` from `MatTy2`.
1094+
The source matrix remains valid and unmodified after this operation is applied.
1095+
Validation shall enforce that both matrices have the same scope and dimensions.
10921096

10931097
```llvm
1094-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.matrixLoadFromDescriptor.[MatTy](
1098+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgMatrixLoadFromDescriptor.[MatTy](
10951099
immarg i32, ; opcode
10961100
%dx.types.Handle, ; ByteAddressBuffer
10971101
i32, ; Offset
@@ -1112,7 +1116,7 @@ Validation rules will enforce that:
11121116
* `Stride` is `0` if the `Layout` is not `RowMajor` or `ColMajor`
11131117

11141118
```llvm
1115-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.matrixLoadFromMemory.[MatTy].[Ty](
1119+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgMatrixLoadFromMemory.[MatTy].[Ty](
11161120
immarg i32, ; opcode
11171121
[Ty] * addrspace(4), ; groupshared T[M * N]
11181122
i32, ; Offset
@@ -1126,7 +1130,7 @@ between opaque matrices and groupshared memory are defined in the
11261130
[Conversions](#conversions) section below.
11271131

11281132
```llvm
1129-
declare i32 @dx.op.matrixLength.[MatTy](
1133+
declare i32 @dx.op.linAlgMatrixLength.[MatTy](
11301134
immarg i32, ; opcode
11311135
%dx.types.LinAlgMatrix<mangling> ; matrix
11321136
)
@@ -1136,7 +1140,7 @@ Returns the number of elements stored in thread-local storage on the active
11361140
thread for the provided matrix.
11371141

11381142
```llvm
1139-
declare <2 x i32> @dx.op.matrixGetCoordinate.[MatTy](
1143+
declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.[MatTy](
11401144
immarg i32, ; opcode
11411145
%dx.types.LinAlgMatrix<mangling>, ; matrix
11421146
i32 ; thread-local index
@@ -1147,7 +1151,7 @@ Returns a two element vector containing the column and row of the matrix that
11471151
the thread-local index corresponds to.
11481152

11491153
```llvm
1150-
declare [Ty] @dx.op.matrixGetElement.[Ty].[MatTy](
1154+
declare [Ty] @dx.op.linAlgMatrixGetElement.[Ty].[MatTy](
11511155
immarg i32, ; opcode
11521156
%dx.types.LinAlgMatrix<mangling>, ; matrix
11531157
i32 ; thread-local index
@@ -1159,7 +1163,7 @@ If the index is out of range for the values stored in this thread the result is
11591163
0.
11601164

11611165
```llvm
1162-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.matrixSetElement.[MatTy].[MatTy].[Ty](
1166+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgMatrixSetElement.[MatTy].[MatTy].[Ty](
11631167
immarg i32, ; opcode
11641168
%dx.types.LinAlgMatrix<mangling>, ; input matrix
11651169
i32, ; thread-local index
@@ -1172,7 +1176,7 @@ to the value provided. If the index is out of range for the values stored in
11721176
this thread the result is a no-op.
11731177

11741178
```llvm
1175-
declare void @dx.op.matrixStoreToDescriptor.[MatTy](
1179+
declare void @dx.op.linAlgMatrixStoreToDescriptor.[MatTy](
11761180
immarg i32, ; opcode
11771181
%dx.types.LinAlgMatrix<mangling>, ; matrix
11781182
%dx.types.Handle, ; ByteAddressBuffer
@@ -1190,7 +1194,7 @@ Validation rules will enforce that:
11901194
* `Layout` is `RowMajor` or `ColMajor`
11911195

11921196
```llvm
1193-
declare void @dx.op.matrixStoreToMemory.[MatTy].[Ty](
1197+
declare void @dx.op.linAlgMatrixStoreToMemory.[MatTy].[Ty](
11941198
immarg i32, ; opcode
11951199
%dx.types.LinAlgMatrix<mangling>, ; matrix
11961200
[Ty] *, ; groupshared T[M * N]
@@ -1208,7 +1212,7 @@ The validator will ensure that the group shared target memory is large enough
12081212
for the write.
12091213

12101214
```llvm
1211-
declare i32 @dx.op.matrixQueryAccumulatorLayout(
1215+
declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(
12121216
immarg i32, ; opcode
12131217
)
12141218
```
@@ -1220,7 +1224,7 @@ layout while a return value of `1` will denote that accumulator matrices are `B`
12201224
layout.
12211225

12221226
```llvm
1223-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.matrixMulOp.[MatTyC].[MatTyA].[MatTyB](
1227+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgMatrixMulOp.[MatTyC].[MatTyA].[MatTyB](
12241228
immarg i32, ; opcode
12251229
%dx.types.LinAlgMatrix<mangling>, ; matrix A
12261230
%dx.types.LinAlgMatrix<mangling> ; matrix B
@@ -1245,7 +1249,7 @@ Must be called from wave-uniform control flow.
12451249

12461250

12471251
```llvm
1248-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.matrixAccumulate.[MatTyC].[MatTyLHS].[MatTyRHS](
1252+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgMatrixAccumulate.[MatTyC].[MatTyLHS].[MatTyRHS](
12491253
immarg i32, ; opcode
12501254
%dx.types.LinAlgMatrix<mangling>, ; matrix LHS
12511255
%dx.types.LinAlgMatrix<mangling>, ; matrix RHS
@@ -1266,7 +1270,7 @@ Validation rules will enforce that:
12661270
Must be called from wave-uniform control flow.
12671271

12681272
``` llvm
1269-
declare <[NUMo] x [TYo]> @dx.op.matvecmul.v[NUMo][TYo].[MatTy].v[NUMi][TYi](
1273+
declare <[NUMo] x [TYo]> @dx.op.linAlgMatVecMul.v[NUMo][TYo].[MatTy].v[NUMi][TYi](
12701274
immarg i32, ; opcode
12711275
%dx.types.LinAlgMatrix<mangling>, ; matrix A
12721276
<[NUMi] x [TYi]>, ; input vector
@@ -1282,7 +1286,7 @@ Validation will enforce that:
12821286
* The matrix A is an `A` matrix of `Thread` scope
12831287

12841288
``` llvm
1285-
declare <[NUMo] x [TYo]> @dx.op.matvecmuladd.v[NUMo][TYo].[MatTy].v[NUMi][TYi].v[NUMo][TYb](
1289+
declare <[NUMo] x [TYo]> @dx.op.linAlgMatVecMulAdd.v[NUMo][TYo].[MatTy].v[NUMi][TYi].v[NUMo][TYb](
12861290
immarg i32, ; opcode
12871291
%dx.types.LinAlgMatrix<mangling>, ; matrix A
12881292
<[NUMi] x [TYi]>, ; input vector
@@ -1301,7 +1305,7 @@ Validation will enforce that:
13011305
* The matrix A is an `A` matrix of `Thread` scope
13021306

13031307
```llvm
1304-
declare void @dx.op.matrixAccumulateToDescriptor.[MatTy](
1308+
declare void @dx.op.linAlgMatrixAccumulateToDescriptor.[MatTy](
13051309
immarg i32, ; opcode
13061310
%dx.types.LinAlgMatrix<mangling>, ; matrix
13071311
%dx.types.Handle, ; RWByteAddressBuffer
@@ -1325,7 +1329,7 @@ Validation rules will enforce that:
13251329
* `Stride` is `0` if the `Layout` is not `RowMajor` or `ColMajor`
13261330

13271331
```llvm
1328-
declare void @dx.op.matrixAccumulateToMemory.[MatTy].p[Ty](
1332+
declare void @dx.op.linAlgMatrixAccumulateToMemory.[MatTy].p[Ty](
13291333
immarg i32, ; opcode
13301334
%dx.types.LinAlgMatrix<mangling>, ; matrix
13311335
[Ty] *, ; groupshared T[M * N]
@@ -1344,7 +1348,7 @@ The validator will ensure that the group shared target memory is large enough
13441348
for the write.
13451349

13461350
```llvm
1347-
declare %dx.types.LinAlgMatrix<mangling> @dx.op.matrixOuterProduct.[MatTy].v[M][TY].v[N][TY](
1351+
declare %dx.types.LinAlgMatrix<mangling> @dx.op.linAlgMatrixOuterProduct.[MatTy].v[M][TY].v[N][TY](
13481352
immarg i32, ; opcode
13491353
<[M] x [Ty]>, ; vector A
13501354
<[N] x [Ty]> ; vector B
@@ -1366,20 +1370,20 @@ Validation will ensure that:
13661370

13671371
#### Bounds Checking Behavior
13681372

1369-
The `@dx.op.matrixLoadFromDescriptor` operation loads data from a descriptor.
1370-
For load operations a default element value of zero casted to the element type
1371-
is substituted for out of bounds reads. An implementation may either perform
1372-
bounds checking on the full bounds of the load initializing the full matrix to
1373-
the default element value if any element is out of bounds, or it may perform
1374-
per-element bounds checking initializing only the out of bounds elements to the
1375-
default value.
1376-
1377-
The `@dx.op.matrixStoreToDescriptor` and `@dx.op.matrixAccumulateToDescriptor`
1378-
operations write data to a descriptor. Writes to out of bounds memory are a
1379-
no-op. An implementation may either perform bounds checking on the full bounds
1380-
of the store converting the whole store to a no-op if any elelemt is out of
1381-
bounds, or it may perform per-element bounds checking only converting the out of
1382-
bounds stores to no-ops.
1373+
The `@dx.op.linAlgMatrixLoadFromDescriptor` operation loads data from a
1374+
descriptor. For load operations a default element value of zero casted to the
1375+
element type is substituted for out of bounds reads. An implementation may
1376+
either perform bounds checking on the full bounds of the load initializing the
1377+
full matrix to the default element value if any element is out of bounds, or it
1378+
may perform per-element bounds checking initializing only the out of bounds
1379+
elements to the default value.
1380+
1381+
The `@dx.op.linAlgMatrixStoreToDescriptor` and
1382+
`@dx.op.linAlgMatrixAccumulateToDescriptor` operations write data to a
1383+
descriptor. Writes to out of bounds memory are a no-op. An implementation may
1384+
either perform bounds checking on the full bounds of the store converting the
1385+
whole store to a no-op if any elelemt is out of bounds, or it may perform
1386+
per-element bounds checking only converting the out of bounds stores to no-ops.
13831387

13841388
> Note: bounds checking is not required for reads and writes to root descriptors
13851389
> as D3D does not attach dimensions to root descriptors.
@@ -1406,7 +1410,7 @@ struct MatrixUse {
14061410
```
14071411

14081412
This object will encode each matrix shape and element type as used by the DXIL
1409-
operations in the `matrixMulOp` and `matvecmuladd` opcode classes.
1413+
operations in the `linAlgMatrixMulOp` and `linAlgMatVecMulAdd` opcode classes.
14101414

14111415
The Scope field will encode one of the values defined in the [`DXILMatrixScope`
14121416
enumeration](#dxil-enumerations).

0 commit comments

Comments
 (0)