1919#define DLPACK_MAJOR_VERSION 1
2020
2121/* ! \brief The current minor version of dlpack */
22- #define DLPACK_MINOR_VERSION 0
22+ #define DLPACK_MINOR_VERSION 1
2323
2424/* ! \brief DLPACK_DLL prefix for windows */
2525#ifdef _WIN32
@@ -157,6 +157,26 @@ typedef enum {
157157 kDLComplex = 5U ,
158158 /* ! \brief boolean */
159159 kDLBool = 6U ,
160+ /* ! \brief FP8 data types */
161+ kDLFloat8_e3m4 = 7U ,
162+ kDLFloat8_e4m3 = 8U ,
163+ kDLFloat8_e4m3b11fnuz = 9U ,
164+ kDLFloat8_e4m3fn = 10U ,
165+ kDLFloat8_e4m3fnuz = 11U ,
166+ kDLFloat8_e5m2 = 12U ,
167+ kDLFloat8_e5m2fnuz = 13U ,
168+ kDLFloat8_e8m0fnu = 14U ,
169+ /* ! \brief FP6 data types
170+ * Setting bits != 6 is currently unspecified, and the producer must ensure it is set
171+ * while the consumer must stop importing if the value is unexpected.
172+ */
173+ kDLFloat6_e2m3fn = 15U ,
174+ kDLFloat6_e3m2fn = 16U ,
175+ /* ! \brief FP4 data types
176+ * Setting bits != 4 is currently unspecified, and the producer must ensure it is set
177+ * while the consumer must stop importing if the value is unexpected.
178+ */
179+ kDLFloat4_e2m1fn = 17U ,
160180} DLDataTypeCode;
161181
162182/* !
@@ -170,6 +190,12 @@ typedef enum {
170190 * - int8: type_code = 0, bits = 8, lanes = 1
171191 * - std::complex<float>: type_code = 5, bits = 64, lanes = 1
172192 * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
193+ * - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory)
194+ * - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory)
195+ * - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory)
196+ *
197+ * When a sub-byte type is packed, DLPack requires the data to be in little bit-endian, i.e.,
198+ * for a packed data set D ((D >> (i * bits)) && bit_mask) stores the i-th element.
173199 */
174200typedef struct {
175201 /* !
@@ -196,8 +222,8 @@ typedef struct {
196222 * types. This pointer is always aligned to 256 bytes as in CUDA. The
197223 * `byte_offset` field should be used to point to the beginning of the data.
198224 *
199- * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
200- * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
225+ * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow,
226+ * TVM, perhaps others) do not adhere to this 256 byte alignment requirement
201227 * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
202228 * (after which this note will be updated); at the moment it is recommended
203229 * to not rely on the data pointer being correctly aligned.
@@ -267,7 +293,7 @@ typedef struct DLManagedTensor {
267293 void (*deleter)(struct DLManagedTensor * self);
268294} DLManagedTensor;
269295
270- // bit masks used in in the DLManagedTensorVersioned
296+ // bit masks used in the DLManagedTensorVersioned
271297
272298/* ! \brief bit mask to indicate that the tensor is read only. */
273299#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL )
@@ -280,6 +306,14 @@ typedef struct DLManagedTensor {
280306 */
281307#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL )
282308
309+ /* !
310+ * \brief bit mask to indicate that whether a sub-byte type is packed or padded.
311+ *
312+ * The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can
313+ * be set by the producer to signal that a tensor of sub-byte type is padded.
314+ */
315+ #define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL )
316+
283317/* !
284318 * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
285319 *
0 commit comments