Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "ggml"]
path = ggml
url = https://github.com/ggml-org/ggml.git
url = https://github.com/leejet/ggml.git
[submodule "examples/server/frontend"]
path = examples/server/frontend
url = https://github.com/leejet/sdcpp-webui.git
Expand Down
4 changes: 4 additions & 0 deletions examples/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ add_executable(${TARGET}
image_metadata.cpp
main.cpp
)
target_include_directories(${TARGET} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/.."
"${PROJECT_SOURCE_DIR}/src"
)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
if(SD_WEBP)
Expand Down
1 change: 1 addition & 0 deletions examples/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ Generation Options:
--disable-auto-resize-ref-image disable auto resize of ref images
--disable-image-metadata do not embed generation metadata on image files
--vae-tiling process vae in tiles to reduce memory usage
--temporal-tiling enable temporal tiling for LTX video VAE decode
--hires enable highres fix
-s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m,
Expand Down
64 changes: 59 additions & 5 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,32 @@ std::string format_frame_idx(std::string pattern, int frame_idx) {
return result;
}

static fs::path get_video_audio_sidecar_path(const SDCliParams& cli_params) {
fs::path out_path = cli_params.output_path;
fs::path base_path = out_path;
fs::path ext = out_path.has_extension() ? out_path.extension() : fs::path{};
std::string ext_lower = ext.string();
std::transform(ext_lower.begin(), ext_lower.end(), ext_lower.begin(), ::tolower);
const EncodedImageFormat output_format = encoded_image_format_from_path(out_path.string());
if (!ext.empty()) {
if (output_format == EncodedImageFormat::JPEG ||
output_format == EncodedImageFormat::PNG ||
output_format == EncodedImageFormat::WEBP ||
ext_lower == ".avi" ||
ext_lower == ".webm") {
base_path.replace_extension();
}
}
base_path += ".wav";
return base_path;
}

bool save_results(const SDCliParams& cli_params,
const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
sd_image_t* results,
int num_results) {
int num_results,
const sd_audio_t* generated_audio = nullptr) {
if (results == nullptr || num_results <= 0) {
return false;
}
Expand Down Expand Up @@ -442,6 +463,21 @@ bool save_results(const SDCliParams& cli_params,
return ok;
};

auto write_audio_sidecar = [&](const fs::path& wav_path) {
if (generated_audio == nullptr) {
return;
}
if (write_wav_to_file(wav_path.string(),
generated_audio->data,
generated_audio->sample_count,
generated_audio->channels,
generated_audio->sample_rate)) {
LOG_INFO("save result audio to '%s'", wav_path.string().c_str());
} else {
LOG_WARN("failed to save result audio to '%s'", wav_path.string().c_str());
}
};

int sucessful_reults = 0;

if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
Expand All @@ -465,8 +501,16 @@ bool save_results(const SDCliParams& cli_params,
ext = ".avi";
fs::path video_path = base_path;
video_path += ext;
if (create_video_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
std::string final_ext_lower = ext.string();
std::transform(final_ext_lower.begin(), final_ext_lower.end(), final_ext_lower.begin(), ::tolower);
const bool mux_audio = generated_audio != nullptr && (final_ext_lower == ".avi" || final_ext_lower == ".webm");
if (create_video_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps, 90, mux_audio ? generated_audio : nullptr) == 0) {
LOG_INFO("save result video to '%s'", video_path.string().c_str());
if (generated_audio != nullptr && !mux_audio) {
fs::path wav_path = video_path;
wav_path.replace_extension(".wav");
write_audio_sidecar(wav_path);
}
return true;
} else {
LOG_ERROR("Failed to save result video to '%s'", video_path.string().c_str());
Expand All @@ -488,6 +532,9 @@ bool save_results(const SDCliParams& cli_params,
}
}
LOG_INFO("%d/%d images saved", sucessful_reults, num_results);
if (generated_audio != nullptr) {
write_audio_sidecar(get_video_audio_sidecar_path(cli_params));
}
return sucessful_reults != 0;
}

Expand Down Expand Up @@ -701,7 +748,8 @@ int main(int argc, const char* argv[]) {
sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);

SDImageVec results;
int num_results = 0;
int num_results = 0;
sd_audio_t* generated_audio = nullptr;

if (cli_params.mode == UPSCALE) {
num_results = 1;
Expand Down Expand Up @@ -733,7 +781,10 @@ int main(int argc, const char* argv[]) {
results.adopt(generate_image(sd_ctx.get(), &img_gen_params), num_results);
} else if (cli_params.mode == VID_GEN) {
sd_vid_gen_params_t vid_gen_params = gen_params.to_sd_vid_gen_params_t();
sd_image_t* generated_video = generate_video(sd_ctx.get(), &vid_gen_params, &num_results);
sd_image_t* generated_video = nullptr;
if (!generate_video(sd_ctx.get(), &vid_gen_params, &generated_video, &num_results, &generated_audio)) {
generated_video = nullptr;
}
results.adopt(generated_video, num_results);
}

Expand Down Expand Up @@ -773,9 +824,12 @@ int main(int argc, const char* argv[]) {
}
}

if (!save_results(cli_params, ctx_params, gen_params, results.data(), num_results)) {
if (!save_results(cli_params, ctx_params, gen_params, results.data(), num_results, generated_audio)) {
free_sd_audio(generated_audio);
return 1;
}

free_sd_audio(generated_audio);

return 0;
}
22 changes: 22 additions & 0 deletions examples/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,18 @@ ArgOptions SDContextParams::get_options() {
"--high-noise-diffusion-model",
"path to the standalone high noise diffusion model",
&high_noise_diffusion_model_path},
{"",
"--embeddings-connectors",
"path to LTXAV embeddings connectors",
&embeddings_connectors_path},
{"",
"--vae",
"path to standalone vae model",
&vae_path},
{"",
"--audio-vae",
"path to standalone LTX audio vae model",
&audio_vae_path},
{"",
"--taesd",
"path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)",
Expand Down Expand Up @@ -661,7 +669,9 @@ std::string SDContextParams::to_string() const {
<< " llm_vision_path: \"" << llm_vision_path << "\",\n"
<< " diffusion_model_path: \"" << diffusion_model_path << "\",\n"
<< " high_noise_diffusion_model_path: \"" << high_noise_diffusion_model_path << "\",\n"
<< " embeddings_connectors_path: \"" << embeddings_connectors_path << "\",\n"
<< " vae_path: \"" << vae_path << "\",\n"
<< " audio_vae_path: \"" << audio_vae_path << "\",\n"
<< " taesd_path: \"" << taesd_path << "\",\n"
<< " esrgan_path: \"" << esrgan_path << "\",\n"
<< " control_net_path: \"" << control_net_path << "\",\n"
Expand Down Expand Up @@ -718,7 +728,9 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
llm_vision_path.c_str(),
diffusion_model_path.c_str(),
high_noise_diffusion_model_path.c_str(),
embeddings_connectors_path.c_str(),
vae_path.c_str(),
audio_vae_path.c_str(),
taesd_path.c_str(),
control_net_path.c_str(),
embedding_vec.data(),
Expand Down Expand Up @@ -990,6 +1002,11 @@ ArgOptions SDGenerationParams::get_options() {
"process vae in tiles to reduce memory usage",
true,
&vae_tiling_params.enabled},
{"",
"--temporal-tiling",
"enable temporal tiling for LTX video VAE decode",
true,
&vae_tiling_params.temporal_tiling},
{"",
"--hires",
"enable highres fix",
Expand Down Expand Up @@ -1682,6 +1699,9 @@ bool SDGenerationParams::from_json_str(
if (tiling_json.contains("enabled") && tiling_json["enabled"].is_boolean()) {
vae_tiling_params.enabled = tiling_json["enabled"];
}
if (tiling_json.contains("temporal_tiling") && tiling_json["temporal_tiling"].is_boolean()) {
vae_tiling_params.temporal_tiling = tiling_json["temporal_tiling"];
}
if (tiling_json.contains("tile_size_x") && tiling_json["tile_size_x"].is_number_integer()) {
vae_tiling_params.tile_size_x = tiling_json["tile_size_x"];
}
Expand Down Expand Up @@ -2187,6 +2207,7 @@ sd_vid_gen_params_t SDGenerationParams::to_sd_vid_gen_params_t() {
params.strength = strength;
params.seed = seed;
params.video_frames = video_frames;
params.fps = fps;
params.vace_strength = vace_strength;
params.vae_tiling_params = vae_tiling_params;
params.cache = cache_params;
Expand Down Expand Up @@ -2275,6 +2296,7 @@ std::string SDGenerationParams::to_string() const {
<< ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
<< " vae_tiling_params: { "
<< vae_tiling_params.enabled << ", "
<< vae_tiling_params.temporal_tiling << ", "
<< vae_tiling_params.tile_size_x << ", "
<< vae_tiling_params.tile_size_y << ", "
<< vae_tiling_params.target_overlap << ", "
Expand Down
4 changes: 3 additions & 1 deletion examples/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ struct SDContextParams {
std::string llm_vision_path;
std::string diffusion_model_path;
std::string high_noise_diffusion_model_path;
std::string embeddings_connectors_path;
std::string vae_path;
std::string audio_vae_path;
std::string taesd_path;
std::string esrgan_path;
std::string control_net_path;
Expand Down Expand Up @@ -183,7 +185,7 @@ struct SDGenerationParams {
int video_frames = 1;
int fps = 16;
float vace_strength = 1.f;
sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
sd_tiling_params_t vae_tiling_params = {false, false, 0, 0, 0.5f, 0.0f, 0.0f};

std::string pm_id_images_dir;
std::string pm_id_embed_path;
Expand Down
Loading
Loading