leejet
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 24 additions & 31 deletions b/‎.github/workflows/build.yml‎
Lines changed: 24 additions & 31 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 5 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎stable-diffusion.h‎ ‎include/stable-diffusion.h‎stable-diffusion.h renamed to include/stable-diffusion.h b/‎stable-diffusion.h‎ ‎include/stable-diffusion.h‎stable-diffusion.h renamed to include/stable-diffusion.h
diff --git a/‎face_detect.py‎ ‎script/face_detect.py‎face_detect.py renamed to script/face_detect.py
Lines changed: 87 additions & 87 deletions b/‎face_detect.py‎ ‎script/face_detect.py‎face_detect.py renamed to script/face_detect.py
Lines changed: 87 additions & 87 deletions
diff --git a/‎cache_dit.hpp‎ ‎src/cache_dit.hpp‎cache_dit.hpp renamed to src/cache_dit.hpp b/‎cache_dit.hpp‎ ‎src/cache_dit.hpp‎cache_dit.hpp renamed to src/cache_dit.hpp
diff --git a/‎clip.hpp‎ ‎src/clip.hpp‎clip.hpp renamed to src/clip.hpp b/‎clip.hpp‎ ‎src/clip.hpp‎clip.hpp renamed to src/clip.hpp
diff --git a/‎common.hpp‎ ‎src/common.hpp‎common.hpp renamed to src/common.hpp b/‎common.hpp‎ ‎src/common.hpp‎common.hpp renamed to src/common.hpp
diff --git a/‎conditioner.hpp‎ ‎src/conditioner.hpp‎conditioner.hpp renamed to src/conditioner.hpp b/‎conditioner.hpp‎ ‎src/conditioner.hpp‎conditioner.hpp renamed to src/conditioner.hpp
diff --git a/‎control.hpp‎ ‎src/control.hpp‎control.hpp renamed to src/control.hpp b/‎control.hpp‎ ‎src/control.hpp‎control.hpp renamed to src/control.hpp
diff --git a/‎denoiser.hpp‎ ‎src/denoiser.hpp‎denoiser.hpp renamed to src/denoiser.hpp b/‎denoiser.hpp‎ ‎src/denoiser.hpp‎denoiser.hpp renamed to src/denoiser.hpp
@@ -535,31 +535,30 @@ jobs:
           # Add ROCm to PATH for current session
           echo "/opt/rocm/bin" >> $GITHUB_PATH
 
-          # Build case pattern from GPU_TARGETS
-          PATTERN=$(printf '%s' "$GPU_TARGETS" | sed 's/;/\*|\*/g')
-          PATTERN="*${PATTERN}*"
+          # Build regex pattern from ${{ env.GPU_TARGETS }} (match target as substring)
+          TARGET_REGEX="($(printf '%s' "${{ env.GPU_TARGETS }}" | sed 's/;/|/g'))"
 
           # Remove library files for architectures we're not building for to save disk space
           echo "Cleaning up unneeded architecture files..."
           cd /opt/rocm/lib/rocblas/library
           # Keep only our target architectures
           for file in *; do
-            case "$file" in
-            $PATTERN)
-              ;;
-            *)
-              sudo rm -f "$file" ;;
-            esac;
+            if printf '%s' "$file" | grep -q 'gfx'; then
+              if ! printf '%s' "$file" | grep -Eq "$TARGET_REGEX"; then
+                echo "Removing $file" &&
+                sudo rm -f "$file";
+              fi
+            fi
           done
 
           cd /opt/rocm/lib/hipblaslt/library
           for file in *; do
-            case "$file" in
-              $PATTERN)
-                ;;
-              *)
-                sudo rm -f "$file" ;;
-            esac;
+            if printf '%s' "$file" | grep -q 'gfx'; then
+              if ! printf '%s' "$file" | grep -Eq "$TARGET_REGEX"; then
+                echo "Removing $file" &&
+                sudo rm -f "$file";
+              fi
+            fi
           done
 
       - name: Build
@@ -592,21 +591,15 @@ jobs:
           cp ggml/LICENSE ./build/bin/ggml.txt
           cp LICENSE ./build/bin/stable-diffusion.cpp.txt
 
-          # Create directories for ROCm libraries
-          mkdir -p ./build/bin/rocblas/library
-          mkdir -p ./build/bin/hipblaslt/library
-
-          # Copy ROCm runtime libraries (use || true to continue if files don't exist)
-          cp /opt/rocm/lib/librocsparse.so* ./build/bin/ || true
-          cp /opt/rocm/lib/libhsa-runtime64.so* ./build/bin/ || true
-          cp /opt/rocm/lib/libamdhip64.so* ./build/bin/ || true
-          cp /opt/rocm/lib/libhipblas.so* ./build/bin/ || true
-          cp /opt/rocm/lib/libhipblaslt.so* ./build/bin/ || true
-          cp /opt/rocm/lib/librocblas.so* ./build/bin/ || true
-
-          # Copy library files (already filtered to target architectures)
-          cp /opt/rocm/lib/rocblas/library/* ./build/bin/rocblas/library/ || true
-          cp /opt/rocm/lib/hipblaslt/library/* ./build/bin/hipblaslt/library/ || true
+          # Move ROCm runtime libraries (to avoid double space consumption)
+          sudo mv /opt/rocm/lib/librocsparse.so* ./build/bin/
+          sudo mv /opt/rocm/lib/libhsa-runtime64.so* ./build/bin/
+          sudo mv /opt/rocm/lib/libamdhip64.so* ./build/bin/
+          sudo mv /opt/rocm/lib/libhipblas.so* ./build/bin/
+          sudo mv /opt/rocm/lib/libhipblaslt.so* ./build/bin/
+          sudo mv /opt/rocm/lib/librocblas.so* ./build/bin/
+          sudo mv /opt/rocm/lib/rocblas/ ./build/bin/
+          sudo mv /opt/rocm/lib/hipblaslt/ ./build/bin/
 
       - name: Fetch system info
         id: system-info
@@ -622,7 +615,7 @@ jobs:
         run: |
           cp ggml/LICENSE ./build/bin/ggml.txt
           cp LICENSE ./build/bin/stable-diffusion.cpp.txt
-          zip -j sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-Ubuntu-${{ env.UBUNTU_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-rocm.zip ./build/bin/*
+          zip -y -r sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-Ubuntu-${{ env.UBUNTU_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-rocm.zip ./build/bin
 
       - name: Upload artifacts
         if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
 
@@ -87,9 +87,9 @@ endif()
 set(SD_LIB stable-diffusion)
 
 file(GLOB SD_LIB_SOURCES
-    "*.h"
-    "*.cpp"
-    "*.hpp"
+    "src/*.h"
+    "src/*.cpp"
+    "src/*.hpp"
 )
 
 find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
@@ -119,7 +119,7 @@ endif()
 message(STATUS "stable-diffusion.cpp commit ${SDCPP_BUILD_COMMIT}")
 
 set_property(
-  SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp
+  SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/version.cpp
   APPEND PROPERTY COMPILE_DEFINITIONS
   SDCPP_BUILD_COMMIT=${SDCPP_BUILD_COMMIT} SDCPP_BUILD_VERSION=${SDCPP_BUILD_VERSION}
 )
@@ -182,6 +182,7 @@ endif()
 add_subdirectory(thirdparty)
 
 target_link_libraries(${SD_LIB} PUBLIC ggml zip)
+target_include_directories(${SD_LIB} PUBLIC . include)
 target_include_directories(${SD_LIB} PUBLIC . thirdparty)
 target_compile_features(${SD_LIB} PUBLIC c_std_11 cxx_std_17)
 
@@ -190,7 +191,7 @@ if (SD_BUILD_EXAMPLES)
     add_subdirectory(examples)
 endif()
 
-set(SD_PUBLIC_HEADERS stable-diffusion.h)
+set(SD_PUBLIC_HEADERS include/stable-diffusion.h)
 set_target_properties(${SD_LIB} PROPERTIES PUBLIC_HEADER "${SD_PUBLIC_HEADERS}")
 
 install(TARGETS ${SD_LIB} LIBRARY PUBLIC_HEADER)
@@ -1,88 +1,88 @@
-import os
-import sys
-
-import numpy as np
-import torch
-from diffusers.utils import load_image
-# pip install insightface==0.7.3
-from insightface.app import FaceAnalysis
-from insightface.data import get_image as ins_get_image
-from safetensors.torch import save_file
-
-### 
-# https://github.com/cubiq/ComfyUI_IPAdapter_plus/issues/165#issue-2055829543
-###
-class FaceAnalysis2(FaceAnalysis):
-    # NOTE: allows setting det_size for each detection call.
-    # the model allows it but the wrapping code from insightface
-    # doesn't show it, and people end up loading duplicate models
-    # for different sizes where there is absolutely no need to
-    def get(self, img, max_num=0, det_size=(640, 640)):
-        if det_size is not None:
-            self.det_model.input_size = det_size
-
-        return super().get(img, max_num)
-
-def analyze_faces(face_analysis: FaceAnalysis, img_data: np.ndarray, det_size=(640, 640)):
-    # NOTE: try detect faces, if no faces detected, lower det_size until it does
-    detection_sizes = [None] + [(size, size) for size in range(640, 256, -64)] + [(256, 256)]
-
-    for size in detection_sizes:
-        faces = face_analysis.get(img_data, det_size=size)
-        if len(faces) > 0:
-            return faces
-
-    return []
-
-if __name__ == "__main__":
-    #face_detector = FaceAnalysis2(providers=['CUDAExecutionProvider'], allowed_modules=['detection', 'recognition'])
-    face_detector = FaceAnalysis2(providers=['CPUExecutionProvider'], allowed_modules=['detection', 'recognition'])
-    face_detector.prepare(ctx_id=0, det_size=(640, 640))
-    #input_folder_name = './scarletthead_woman'
-    input_folder_name = sys.argv[1]
-    image_basename_list = os.listdir(input_folder_name)
-    image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])
-
-    input_id_images = []
-    for image_path in image_path_list:
-        input_id_images.append(load_image(image_path))
-    
-    id_embed_list = []
-    
-    for img in input_id_images:
-        img = np.array(img)
-        img = img[:, :, ::-1]
-        faces = analyze_faces(face_detector, img)
-        if len(faces) > 0:
-            id_embed_list.append(torch.from_numpy((faces[0]['embedding'])))
-    
-    if len(id_embed_list) == 0:
-        raise ValueError(f"No face detected in input image pool")
-    
-    id_embeds = torch.stack(id_embed_list)    
-    
-    # for r in id_embeds:
-    #     print(r)
-    # #torch.save(id_embeds, input_folder_name+'/id_embeds.pt');
-    # weights = dict()
-    # weights["id_embeds"] = id_embeds
-    # save_file(weights, input_folder_name+'/id_embeds.safetensors')
-
-    binary_data = id_embeds.numpy().tobytes()
-    two = 4
-    zero = 0
-    one = 1
-    tensor_name = "id_embeds"
-# Write binary data to a file
-    with open(input_folder_name+'/id_embeds.bin', "wb") as f:
-        f.write(two.to_bytes(4, byteorder='little'))
-        f.write((len(tensor_name)).to_bytes(4, byteorder='little'))
-        f.write(zero.to_bytes(4, byteorder='little'))
-        f.write((id_embeds.shape[1]).to_bytes(4, byteorder='little'))
-        f.write((id_embeds.shape[0]).to_bytes(4, byteorder='little'))
-        f.write(one.to_bytes(4, byteorder='little'))
-        f.write(one.to_bytes(4, byteorder='little'))
-        f.write(tensor_name.encode('ascii'))
-        f.write(binary_data)
-
+import os
+import sys
+
+import numpy as np
+import torch
+from diffusers.utils import load_image
+# pip install insightface==0.7.3
+from insightface.app import FaceAnalysis
+from insightface.data import get_image as ins_get_image
+from safetensors.torch import save_file
+
+### 
+# https://github.com/cubiq/ComfyUI_IPAdapter_plus/issues/165#issue-2055829543
+###
+class FaceAnalysis2(FaceAnalysis):
+    # NOTE: allows setting det_size for each detection call.
+    # the model allows it but the wrapping code from insightface
+    # doesn't show it, and people end up loading duplicate models
+    # for different sizes where there is absolutely no need to
+    def get(self, img, max_num=0, det_size=(640, 640)):
+        if det_size is not None:
+            self.det_model.input_size = det_size
+
+        return super().get(img, max_num)
+
+def analyze_faces(face_analysis: FaceAnalysis, img_data: np.ndarray, det_size=(640, 640)):
+    # NOTE: try detect faces, if no faces detected, lower det_size until it does
+    detection_sizes = [None] + [(size, size) for size in range(640, 256, -64)] + [(256, 256)]
+
+    for size in detection_sizes:
+        faces = face_analysis.get(img_data, det_size=size)
+        if len(faces) > 0:
+            return faces
+
+    return []
+
+if __name__ == "__main__":
+    #face_detector = FaceAnalysis2(providers=['CUDAExecutionProvider'], allowed_modules=['detection', 'recognition'])
+    face_detector = FaceAnalysis2(providers=['CPUExecutionProvider'], allowed_modules=['detection', 'recognition'])
+    face_detector.prepare(ctx_id=0, det_size=(640, 640))
+    #input_folder_name = './scarletthead_woman'
+    input_folder_name = sys.argv[1]
+    image_basename_list = os.listdir(input_folder_name)
+    image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])
+
+    input_id_images = []
+    for image_path in image_path_list:
+        input_id_images.append(load_image(image_path))
+    
+    id_embed_list = []
+    
+    for img in input_id_images:
+        img = np.array(img)
+        img = img[:, :, ::-1]
+        faces = analyze_faces(face_detector, img)
+        if len(faces) > 0:
+            id_embed_list.append(torch.from_numpy((faces[0]['embedding'])))
+    
+    if len(id_embed_list) == 0:
+        raise ValueError(f"No face detected in input image pool")
+    
+    id_embeds = torch.stack(id_embed_list)    
+    
+    # for r in id_embeds:
+    #     print(r)
+    # #torch.save(id_embeds, input_folder_name+'/id_embeds.pt');
+    # weights = dict()
+    # weights["id_embeds"] = id_embeds
+    # save_file(weights, input_folder_name+'/id_embeds.safetensors')
+
+    binary_data = id_embeds.numpy().tobytes()
+    two = 4
+    zero = 0
+    one = 1
+    tensor_name = "id_embeds"
+# Write binary data to a file
+    with open(input_folder_name+'/id_embeds.bin', "wb") as f:
+        f.write(two.to_bytes(4, byteorder='little'))
+        f.write((len(tensor_name)).to_bytes(4, byteorder='little'))
+        f.write(zero.to_bytes(4, byteorder='little'))
+        f.write((id_embeds.shape[1]).to_bytes(4, byteorder='little'))
+        f.write((id_embeds.shape[0]).to_bytes(4, byteorder='little'))
+        f.write(one.to_bytes(4, byteorder='little'))
+        f.write(one.to_bytes(4, byteorder='little'))
+        f.write(tensor_name.encode('ascii'))
+        f.write(binary_data)
+