Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rdparty/aiter
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably don't need path splitting there either, it should work with DAITER_EMBEDDED_HSA_HEADER=\"{header_path_var}\"'
Also, I wonder if it can be overridden w/o AITERT scripts change with using HIPCC_COMPILE_FLAGS_APPEND or whatever

39 changes: 37 additions & 2 deletions transformer_engine/common/ck_fused_attn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,50 @@ else()

# If not downloaded, Fallback: Build from source
if(NOT AITER_PREBUILT_DOWNLOAD_SUCCESS)
# Generate the embedded HSA header and pass it into AITER compile.py via env vars.
# This avoids needing to set AITER_ASM_DIR (thread-unsafe).
set(AITER_HSA_DIR "${__AITER_SOURCE_DIR}/hsa")
set(AITER_EMBEDDED_HSA_HEADER_PATH "${CMAKE_CURRENT_BINARY_DIR}/aiter_embedded_hsa/aiter_embedded_hsa.h")

set(AITER_EMBEDDED_HSA_SUBDIRS)
foreach(ARCH IN LISTS V3_ASM_ARCHS)
list(APPEND AITER_EMBEDDED_HSA_SUBDIRS
"${ARCH}/fmha_v3_bwd"
"${ARCH}/fmha_v3_fwd")
endforeach()

set(AITER_BUILD_ENV_ARGS)
if(AITER_EMBEDDED_HSA_SUBDIRS)
execute_process(
COMMAND python3 "${CMAKE_CURRENT_LIST_DIR}/generate_aiter_embedded_hsa.py"
--hsa-dir ${AITER_HSA_DIR}
--output ${AITER_EMBEDDED_HSA_HEADER_PATH}
--subdirs ${AITER_EMBEDDED_HSA_SUBDIRS}
RESULT_VARIABLE AITER_MAKE_HSA_RET
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: You can use COMMAND_ERROR_IS_FATAL ANY instead so CMake will fail itself on command failure

)
if(AITER_MAKE_HSA_RET AND NOT AITER_MAKE_HSA_RET EQUAL 0)
message(FATAL_ERROR "Failed to generate aiter_embedded_hsa.h")
endif()
list(APPEND AITER_BUILD_ENV_ARGS AITER_EMBEDDED_HSA_HEADER_PATH=${AITER_EMBEDDED_HSA_HEADER_PATH})
else()
message(STATUS "[AITER-BUILD] No supported V3 ASM arch selected; skipping embedded HSA generation.")
endif()

message(STATUS " [AITER-BUILD] Building aiter from source.")
execute_process(
COMMAND bash ${CMAKE_CURRENT_LIST_DIR}/aiter_build.sh
COMMAND ${CMAKE_COMMAND} -E env
${AITER_BUILD_ENV_ARGS}
bash ${CMAKE_CURRENT_LIST_DIR}/aiter_build.sh
--aiter-dir ${__AITER_SOURCE_DIR}
--aiter-test-dir ${__AITER_TEST_DIR}
--gpu-archs "${V3_ASM_ARCHS_STR}"
--ck-tile-bf16 ${CK_FUSED_ATTN_FLOAT_TO_BFLOAT16_DEFAULT}
RESULT_VARIABLE AITER_BUILD_RET
)
# libmha_fwd.so and libmha_bwd.so will be under 3rdparty/aiter/op_tests/cpp/mha
if(AITER_BUILD_RET AND NOT AITER_BUILD_RET EQUAL 0)
message(FATAL_ERROR "[AITER-BUILD] aiter_build.sh failed")
endif()
# libmha_fwd.a and libmha_bwd.a will be under 3rdparty/aiter/op_tests/cpp/mha
cache_local_aiter_build(${__AITER_TEST_DIR})
endif()
endif()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add "this file was modified for rocm Transformer Engine", like

# This file was modified for portability to AMDGPU

Ilya had more experience on the copyright area. Can you comment on this @ipanfilo ? Thanks

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minimally it should say where is it copied from and refer to Pytorch for original file license

#
# See LICENSE for license information.
# Generate aiter_embedded_hsa.h with embedded binary .co files for AITER HSA kernels.

import argparse
import re
import sys
from pathlib import Path


def sanitize_identifier(name: str) -> str:
"""Convert a file path to a valid C++ identifier."""
return re.sub(r"[^a-zA-Z0-9]", "_", name)


def bytes_to_hex_array(data: bytes, bytes_per_line: int = 16) -> str:
"""Convert bytes to a formatted C hex array string."""
hex_bytes = []
for i, byte in enumerate(data):
if i > 0 and i % bytes_per_line == 0:
hex_bytes.append("\n ")
hex_bytes.append(f"0x{byte:02x}")
if i < len(data) - 1:
hex_bytes.append(",")
return "".join(hex_bytes)


def generate_embedded_hsa_header(
hsa_dir: Path, output_file: Path, subdirs: list[str]
) -> int:
"""
Generate a C++ header file embedding all .co files from specified subdirectories.

Args:
hsa_dir: Base directory containing hsa files (e.g., third_party/aiter/hsa)
output_file: Path to the output header file
subdirs: List of subdirectories to scan for .co files (e.g., ["gfx942/fmha_v3_bwd", "gfx950/fmha_v3_bwd"])

Returns:
Number of .co files embedded
"""
# Collect all .co files
co_files: list[tuple[str, Path]] = []
for subdir in subdirs:
pattern_dir = hsa_dir / subdir
if pattern_dir.exists():
for co_file in sorted(pattern_dir.glob("**/*.co")):
# Key format: hsa/gfx942/fmha_v3_bwd/xxx.co
# Use as_posix() to ensure forward slashes on all platforms
rel_path = co_file.relative_to(hsa_dir).as_posix()
map_key = f"hsa/{rel_path}"
co_files.append((map_key, co_file))

if not co_files:
print(f"Warning: No .co files found in {hsa_dir} under {subdirs}")
return 0

# Generate header content
# Using std::string_view instead of std::span<const unsigned char> for C++17 compatibility
# std::string_view provides .data() method which is what hipModuleLoadData needs
lines = [
"// Auto-generated file. Do not edit.",
"// Embedded AITER HSA binary files for fmha_v3_bwd",
"#pragma once",
"",
"#include <cstdint>",
"#include <string>",
"#include <string_view>",
"#include <unordered_map>",
"",
"// Define AITER_EMBEDDED_HSA_MAP macro so that aiter_hip_common.h",
"// can detect the embedded map is available via #if defined(AITER_EMBEDDED_HSA_MAP)",
"#define AITER_EMBEDDED_HSA_MAP ::aiter_hsa::embedded_hsa_map",
"",
"namespace aiter_hsa {",
"",
]

# Generate array declarations and map entries
array_entries = []
for map_key, co_file in co_files:
with open(co_file, "rb") as f:
data = f.read()

# Only generate array and map entry if file has content
if len(data) > 0:
safe_name = sanitize_identifier(co_file.relative_to(hsa_dir).as_posix())
array_name = f"data_{safe_name}"
file_size = len(data)
array_entries.append((map_key, array_name, file_size))

hex_array = bytes_to_hex_array(data)
lines.append(
f"alignas(4096) inline const unsigned char {array_name}[] = {{\n {hex_array}\n}};"
)
lines.append("")

# Generate the map
lines.append(
"inline const std::unordered_map<std::string, std::string_view> embedded_hsa_map = {"
)
for map_key, array_name, file_size in array_entries:
lines.append(
f' {{"{map_key}", std::string_view(reinterpret_cast<const char*>({array_name}), {file_size})}},'
)
lines.append("};")
lines.append("")
lines.append("} // namespace aiter_hsa")
lines.append("")

# Write output
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, "w") as f:
f.write("\n".join(lines))

return len(array_entries)


def main():
parser = argparse.ArgumentParser(
description="Generate aiter_embedded_hsa.h with embedded binary .co files"
)
parser.add_argument(
"--hsa-dir", required=True, type=Path, help="Path to the aiter hsa directory"
)
parser.add_argument(
"--output", required=True, type=Path, help="Path to the output header file"
)
parser.add_argument(
"--subdirs",
nargs="+",
default=["gfx942/fmha_v3_bwd", "gfx950/fmha_v3_bwd"],
help="Subdirectories to scan for .co files",
)

args = parser.parse_args()

if not args.hsa_dir.exists():
print(f"Error: HSA directory does not exist: {args.hsa_dir}", file=sys.stderr)
return 1

count = generate_embedded_hsa_header(args.hsa_dir, args.output, args.subdirs)
print(f"Generated {args.output} with {count} embedded .co files")
return 0


if __name__ == "__main__":
sys.exit(main())