diff --git a/.gitignore b/.gitignore index 15f041bbf..a7b16344e 100644 --- a/.gitignore +++ b/.gitignore @@ -27,9 +27,12 @@ cuebot/.project /pycue/opencue/compiled_proto/ /rqd/rqd/compiled_proto/ docker-compose-local.yml +/sandbox/kafka* +/sandbox/zookeeper* docs/_site/ docs/bin/ sandbox/kafka-data sandbox/zookeeper-data sandbox/zookeeper-logs docs/_data/version.yml +target/* diff --git a/rust/.gitignore b/rust/.gitignore index 1b699df37..cc0adeff7 100644 --- a/rust/.gitignore +++ b/rust/.gitignore @@ -3,7 +3,12 @@ # TODO: Remove once these crates are stable and ready for public use /crates/cuebot-config /crates/dist-lock -/crates/scheduler .DS_Store config/rqd.local_docker.yaml +/sandbox/kafka* +/reference +Cargo.lock + +# Localized files only meant for building docker images locally +proto diff --git a/rust/AGENTS.md b/rust/AGENTS.md new file mode 100644 index 000000000..e75451a2b --- /dev/null +++ b/rust/AGENTS.md @@ -0,0 +1,175 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is the Rust implementation of OpenCue components - a render farm management system. The project consists of three main crates: + +- **rqd**: The main worker daemon that executes rendering tasks +- **dummy-cuebot**: A testing/development server for interacting with rqd +- **opencue-proto**: gRPC protocol definitions and generated code + +## Build and Development Commands + +### Prerequisites +```bash +# macOS +brew install protobuf + +# Ubuntu/Debian +sudo apt-get install protobuf-compiler +``` + +### Build Commands +```bash +# Build entire project (release mode - OS-specific) +cargo build -r + +# Build in debug mode (includes both Linux and macOS versions) +cargo build + +# Build specific crate +cargo build -p rqd +cargo build -p dummy-cuebot +cargo build -p opencue-proto + +# Run tests (unit tests only) +cargo test + +# Run all tests including integration tests (requires database setup) +cargo test --features integration-tests + +# Run only integration tests +cargo test --features integration-tests integration_tests + +# Run clippy linting +cargo clippy -- -D warnings + +# Format code +cargo fmt +``` + +### Running the System + +1. **Start dummy-cuebot report server:** +```bash +target/release/dummy-cuebot report-server +``` + +2. **Start RQD service:** +```bash +# With fake Linux environment simulation +env OPENCUE_RQD_CONFIG=config/rqd.fake_linux.yaml target/release/openrqd + +# With default config +target/release/openrqd +``` + +3. **Launch a test frame:** +```bash +target/release/dummy-cuebot rqd-client launch-frame crates/rqd/resources/test_scripts/memory_fork.sh +``` + +### Development Testing +```bash +# Run a single test +cargo test test_name + +# Run tests with output +cargo test -- --nocapture + +# Run tests for specific crate +cargo test -p rqd + +# Check logs for test frames +tail -f /tmp/rqd/test_job.test_frame.rqlog +``` + +## Architecture Overview + +### Core Components + +**MachineMonitor** (`crates/rqd/src/system/machine.rs`): +- Central orchestrator for system monitoring and resource management +- Manages CPU/GPU reservations and NIMBY (user activity detection) +- Handles process cleanup and zombie detection + +**FrameManager** (`crates/rqd/src/frame/manager.rs`): +- Manages frame lifecycle: validation, spawning, monitoring, cleanup +- Supports frame recovery after restarts via snapshot system +- Handles resource affinity and Docker containerization + +**ReportClient** (`crates/rqd/src/report/report_client.rs`): +- Handles communication with Cuebot server +- Implements retry logic with exponential backoff +- Supports endpoint rotation for high availability + +**RqdServant** (`crates/rqd/src/servant/rqd_servant.rs`): +- gRPC service implementation +- Handles incoming commands from Cuebot +- Delegates to appropriate managers + +### Key Architectural Patterns + +1. **Async/Await Throughout**: Full async architecture with Tokio runtime +2. **Resource Management**: Careful resource reservation and cleanup +3. **Platform Abstraction**: Separate Linux/macOS system implementations +4. **Configuration System**: YAML-based config with environment variable overrides +5. **Error Handling**: Uses `miette` for user-friendly error reporting + +### Configuration + +- **Default config location**: `~/.local/share/rqd.yaml` +- **Environment override**: `OPENCUE_RQD_CONFIG` environment variable +- **Environment prefix**: `OPENRQD_` for individual settings +- **Test config**: `config/rqd.fake_linux.yaml` for development + +### Frame Execution Flow + +1. **Validation**: Machine state, user permissions, resource availability +2. **Resource Reservation**: CPU cores and GPUs via CoreStateManager +3. **User Management**: Creates system users if needed +4. **Frame Spawning**: Launches in separate threads with optional Docker +5. **Monitoring**: Tracks execution, resource usage, process health +6. **Cleanup**: Releases resources and reports completion + +### Development Notes + +- **Resource Isolation**: Frames run in separate process groups +- **Container Support**: Optional Docker containerization via `containerized_frames` feature +- **Recovery System**: Restores running frames from snapshots after restarts +- **Kill Monitoring**: Tracks frame termination with forced kill capability +- **NIMBY Support**: Prevents frame execution when user is active + +### Important Files + +- `crates/rqd/src/main.rs`: RQD entry point and application setup +- `crates/rqd/src/config/config.rs`: Configuration structure definitions +- `crates/rqd/src/system/reservation.rs`: Resource reservation system +- `crates/dummy-cuebot/src/main.rs`: Testing server entry point +- `crates/opencue-proto/build.rs`: Protocol buffer build configuration + +### Platform-Specific Code + +- `crates/rqd/src/system/linux.rs`: Linux-specific system monitoring +- `crates/rqd/src/system/macos.rs`: macOS-specific system monitoring +- Build configuration automatically selects appropriate implementation + +### Logging and Debugging + +- **Log location**: Configurable via logging config +- **Log levels**: trace, debug, info, warn, error +- **Frame logs**: Individual frame execution logs in `/tmp/rqd/` +- **Structured logging**: Uses `tracing` crate for structured logging + +## Code Review and Standards + +### Rules + +- When reviewing code check: + - If all public methods are documented on their head comment + - Verify for all changed functions if the preexisting documentation needs to be updated + - Analyse possible race conditions introduced by the changes + - Evaluate the overall quality of the change taking into consideration rust standards + - Check for introduced panic conditions that are not properly documented diff --git a/rust/Cargo.lock b/rust/Cargo.lock deleted file mode 100644 index 1730e927e..000000000 --- a/rust/Cargo.lock +++ /dev/null @@ -1,3846 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "addr2line" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler2" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" - -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - -[[package]] -name = "anyhow" -version = "1.0.99" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" - -[[package]] -name = "arraydeque" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" - -[[package]] -name = "async-stream" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb6fa015ebe961e9908ca4c1854e7dc7aabd4417da77b6a0466e4dfb4c8f6f69" -dependencies = [ - "async-stream-impl", - "futures-core-preview", -] - -[[package]] -name = "async-stream-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f0d8c5b411e36dcfb04388bacfec54795726b1f0148adcb0f377a96d6747e0e" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "async-trait" -version = "0.1.88" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "axum" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" -dependencies = [ - "axum-core", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" -dependencies = [ - "bytes", - "futures-core", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - -[[package]] -name = "backtrace" -version = "0.3.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", -] - -[[package]] -name = "backtrace-ext" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" -dependencies = [ - "backtrace", -] - -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" -dependencies = [ - "serde", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "bollard" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97ccca1260af6a459d75994ad5acc1651bcabcbdbc41467cc9786519ab854c30" -dependencies = [ - "base64 0.22.1", - "bollard-stubs", - "bytes", - "futures-core", - "futures-util", - "hex", - "http", - "http-body-util", - "hyper", - "hyper-named-pipe", - "hyper-util", - "hyperlocal", - "log", - "pin-project-lite", - "serde", - "serde_derive", - "serde_json", - "serde_repr", - "serde_urlencoded", - "thiserror 2.0.12", - "tokio", - "tokio-util", - "tower-service", - "url", - "winapi", -] - -[[package]] -name = "bollard-stubs" -version = "1.47.1-rc.27.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f179cfbddb6e77a5472703d4b30436bff32929c0aa8a9008ecf23d1d3cdd0da" -dependencies = [ - "serde", - "serde_repr", - "serde_with", -] - -[[package]] -name = "bumpalo" -version = "3.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" - -[[package]] -name = "bytesize" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e93abca9e28e0a1b9877922aacb20576e05d4679ffa78c3d6dc22a26a216659" -dependencies = [ - "serde", -] - -[[package]] -name = "cc" -version = "1.2.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" -dependencies = [ - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" - -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - -[[package]] -name = "chrono" -version = "0.4.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", - "num-traits", - "serde", - "wasm-bindgen", - "windows-link", -] - -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim", - "textwrap 0.11.0", - "unicode-width 0.1.14", - "vec_map", -] - -[[package]] -name = "config" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68578f196d2a33ff61b27fae256c3164f65e36382648e30666dde05b8cc9dfdf" -dependencies = [ - "async-trait", - "convert_case", - "json5", - "nom", - "pathdiff", - "ron", - "rust-ini", - "serde", - "serde_json", - "toml", - "yaml-rust2", -] - -[[package]] -name = "const-random" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom 0.2.16", - "once_cell", - "tiny-keccak", -] - -[[package]] -name = "convert_case" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "cookie" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" -dependencies = [ - "percent-encoding", - "time", - "version_check", -] - -[[package]] -name = "cookie_store" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fc4bff745c9b4c7fb1e97b25d13153da2bc7796260141df62378998d070207f" -dependencies = [ - "cookie", - "document-features", - "idna", - "indexmap 2.10.0", - "log", - "serde", - "serde_derive", - "serde_json", - "time", - "url", -] - -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "deranged" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" -dependencies = [ - "powerfmt", - "serde", -] - -[[package]] -name = "device_query" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eba6a7f3347c8bc8a4d194b9d8517e3a6b8fe1929ee91aaccf0be278b980033c" -dependencies = [ - "macos-accessibility-client", - "pkg-config", - "readkey", - "readmouse", - "windows 0.48.0", - "x11", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "dlv-list" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" -dependencies = [ - "const-random", -] - -[[package]] -name = "document-features" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d" -dependencies = [ - "litrs", -] - -[[package]] -name = "dummy-cuebot" -version = "0.1.5" -dependencies = [ - "async-trait", - "config", - "futures", - "itertools 0.13.0", - "miette", - "once_cell", - "opencue-proto", - "prost", - "serde", - "serde_derive", - "serde_json", - "structopt", - "tempfile", - "thiserror 1.0.69", - "tokio", - "tokio-postgres", - "tonic", - "users", - "uuid", -] - -[[package]] -name = "dyn-clone" -version = "1.0.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "encoding_rs" -version = "0.8.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "errno" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" -dependencies = [ - "libc", - "windows-sys 0.60.2", -] - -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - -[[package]] -name = "fastrand" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" - -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - -[[package]] -name = "flate2" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" - -[[package]] -name = "futures-core-preview" -version = "0.3.0-alpha.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35b6263fb1ef523c3056565fa67b1d16f0a8604ff12b11b08c25f28a734c60a" - -[[package]] -name = "futures-executor" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" - -[[package]] -name = "futures-macro" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "futures-sink" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" - -[[package]] -name = "futures-task" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" - -[[package]] -name = "futures-util" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - -[[package]] -name = "gimli" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" - -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - -[[package]] -name = "h2" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http", - "indexmap 2.10.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - -[[package]] -name = "hashbrown" -version = "0.15.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" - -[[package]] -name = "hashlink" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" -dependencies = [ - "hashbrown 0.14.5", -] - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "http" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" -dependencies = [ - "bytes", - "http", -] - -[[package]] -name = "http-body-util" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" -dependencies = [ - "bytes", - "futures-core", - "http", - "http-body", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "humantime" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" - -[[package]] -name = "humantime-serde" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a3db5ea5923d99402c94e9feb261dc5ee9b4efa158b0315f788cf549cc200c" -dependencies = [ - "humantime", - "serde", -] - -[[package]] -name = "hyper" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "smallvec", - "tokio", - "want", -] - -[[package]] -name = "hyper-named-pipe" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" -dependencies = [ - "hex", - "hyper", - "hyper-util", - "pin-project-lite", - "tokio", - "tower-service", - "winapi", -] - -[[package]] -name = "hyper-timeout" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" -dependencies = [ - "hyper", - "hyper-util", - "pin-project-lite", - "tokio", - "tower-service", -] - -[[package]] -name = "hyper-util" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "http", - "http-body", - "hyper", - "libc", - "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", -] - -[[package]] -name = "hyperlocal" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" -dependencies = [ - "hex", - "http-body-util", - "hyper", - "hyper-util", - "pin-project-lite", - "tokio", - "tower-service", -] - -[[package]] -name = "iana-time-zone" -version = "0.1.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core 0.61.2", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "icu_collections" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" - -[[package]] -name = "icu_properties" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "potential_utf", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" - -[[package]] -name = "icu_provider" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" -dependencies = [ - "displaydoc", - "icu_locale_core", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", - "serde", -] - -[[package]] -name = "indexmap" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" -dependencies = [ - "equivalent", - "hashbrown 0.15.4", - "serde", -] - -[[package]] -name = "io-uring" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" -dependencies = [ - "bitflags 2.9.1", - "cfg-if", - "libc", -] - -[[package]] -name = "ipnetwork" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf466541e9d546596ee94f9f69590f89473455f88372423e0008fc1a7daf100e" -dependencies = [ - "serde", -] - -[[package]] -name = "is_ci" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - -[[package]] -name = "js-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "json5" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" -dependencies = [ - "pest", - "pest_derive", - "serde", -] - -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - -[[package]] -name = "libc" -version = "0.2.174" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" - -[[package]] -name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" - -[[package]] -name = "litemap" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" - -[[package]] -name = "litrs" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed" - -[[package]] -name = "lock_api" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" - -[[package]] -name = "macos-accessibility-client" -version = "0.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edf7710fbff50c24124331760978fb9086d6de6288dcdb38b25a97f8b1bdebbb" -dependencies = [ - "core-foundation", - "core-foundation-sys", -] - -[[package]] -name = "matchit" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" - -[[package]] -name = "miette" -version = "7.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7" -dependencies = [ - "backtrace", - "backtrace-ext", - "cfg-if", - "miette-derive", - "owo-colors", - "supports-color", - "supports-hyperlinks", - "supports-unicode", - "terminal_size", - "textwrap 0.16.2", - "unicode-width 0.1.14", -] - -[[package]] -name = "miette-derive" -version = "7.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" -dependencies = [ - "adler2", -] - -[[package]] -name = "mio" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" -dependencies = [ - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", -] - -[[package]] -name = "multimap" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" - -[[package]] -name = "nix" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" -dependencies = [ - "bitflags 2.9.1", - "cfg-if", - "cfg_aliases", - "libc", -] - -[[package]] -name = "no-std-net" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "ntapi" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" -dependencies = [ - "winapi", -] - -[[package]] -name = "nu-ansi-term" -version = "0.50.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" -dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "object" -version = "0.36.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "opencue-proto" -version = "0.1.5" -dependencies = [ - "prost", - "prost-types", - "rand 0.8.5", - "rmp", - "rmp-serde", - "serde", - "serde_derive", - "tonic", - "tonic-build", - "uuid", - "whoami", -] - -[[package]] -name = "ordered-multimap" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" -dependencies = [ - "dlv-list", - "hashbrown 0.14.5", -] - -[[package]] -name = "owo-colors" -version = "4.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e" - -[[package]] -name = "parking_lot" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.52.6", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pathdiff" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" - -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - -[[package]] -name = "pest" -version = "2.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" -dependencies = [ - "memchr", - "thiserror 2.0.12", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb056d9e8ea77922845ec74a1c4e8fb17e7c218cc4fc11a15c5d25e189aa40bc" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e404e638f781eb3202dc82db6760c8ae8a1eeef7fb3fa8264b2ef280504966" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "pest_meta" -version = "2.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd1101f170f5903fde0914f899bb503d9ff5271d7ba76bbb70bea63690cc0d5" -dependencies = [ - "pest", - "sha2", -] - -[[package]] -name = "petgraph" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" -dependencies = [ - "fixedbitset", - "indexmap 2.10.0", -] - -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - -[[package]] -name = "pnet" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "682396b533413cc2e009fbb48aadf93619a149d3e57defba19ff50ce0201bd0d" -dependencies = [ - "ipnetwork", - "pnet_base", - "pnet_datalink", - "pnet_packet", - "pnet_sys", - "pnet_transport", -] - -[[package]] -name = "pnet_base" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc190d4067df16af3aba49b3b74c469e611cad6314676eaf1157f31aa0fb2f7" -dependencies = [ - "no-std-net", -] - -[[package]] -name = "pnet_datalink" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79e70ec0be163102a332e1d2d5586d362ad76b01cec86f830241f2b6452a7b7" -dependencies = [ - "ipnetwork", - "libc", - "pnet_base", - "pnet_sys", - "winapi", -] - -[[package]] -name = "pnet_macros" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13325ac86ee1a80a480b0bc8e3d30c25d133616112bb16e86f712dcf8a71c863" -dependencies = [ - "proc-macro2", - "quote", - "regex", - "syn 2.0.104", -] - -[[package]] -name = "pnet_macros_support" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed67a952585d509dd0003049b1fc56b982ac665c8299b124b90ea2bdb3134ab" -dependencies = [ - "pnet_base", -] - -[[package]] -name = "pnet_packet" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c96ebadfab635fcc23036ba30a7d33a80c39e8461b8bd7dc7bb186acb96560f" -dependencies = [ - "glob", - "pnet_base", - "pnet_macros", - "pnet_macros_support", -] - -[[package]] -name = "pnet_sys" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d4643d3d4db6b08741050c2f3afa9a892c4244c085a72fcda93c9c2c9a00f4b" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "pnet_transport" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f604d98bc2a6591cf719b58d3203fd882bdd6bf1db696c4ac97978e9f4776bf" -dependencies = [ - "libc", - "pnet_base", - "pnet_packet", - "pnet_sys", -] - -[[package]] -name = "postgres-protocol" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76ff0abab4a9b844b93ef7b81f1efc0a366062aaef2cd702c76256b5dc075c54" -dependencies = [ - "base64 0.22.1", - "byteorder", - "bytes", - "fallible-iterator", - "hmac", - "md-5", - "memchr", - "rand 0.9.1", - "sha2", - "stringprep", -] - -[[package]] -name = "postgres-types" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613283563cd90e1dfc3518d548caee47e0e725455ed619881f5cf21f36de4b48" -dependencies = [ - "bytes", - "fallible-iterator", - "postgres-protocol", -] - -[[package]] -name = "potential_utf" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" -dependencies = [ - "zerovec", -] - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "prettyplease" -version = "0.2.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" -dependencies = [ - "proc-macro2", - "syn 2.0.104", -] - -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - -[[package]] -name = "proc-macro2" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prost" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" -dependencies = [ - "heck 0.5.0", - "itertools 0.14.0", - "log", - "multimap", - "once_cell", - "petgraph", - "prettyplease", - "prost", - "prost-types", - "regex", - "syn 2.0.104", - "tempfile", -] - -[[package]] -name = "prost-derive" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" -dependencies = [ - "anyhow", - "itertools 0.14.0", - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "prost-types" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" -dependencies = [ - "prost", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" -dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.16", -] - -[[package]] -name = "rand_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" -dependencies = [ - "getrandom 0.3.3", -] - -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "readkey" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a36870cefdfcff57edbc0fa62165f42dfd4e5a0d8965117c1ea84c5700e4450" - -[[package]] -name = "readmouse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be105c72a1e6a5a1198acee3d5b506a15676b74a02ecd78060042a447f408d94" - -[[package]] -name = "redox_syscall" -version = "0.5.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" -dependencies = [ - "bitflags 2.9.1", -] - -[[package]] -name = "ref-cast" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" -dependencies = [ - "ref-cast-impl", -] - -[[package]] -name = "ref-cast-impl" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "regex" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" - -[[package]] -name = "ring" -version = "0.17.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" -dependencies = [ - "cc", - "cfg-if", - "getrandom 0.2.16", - "libc", - "untrusted", - "windows-sys 0.52.0", -] - -[[package]] -name = "rmp" -version = "0.8.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" -dependencies = [ - "byteorder", - "num-traits", - "paste", -] - -[[package]] -name = "rmp-serde" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" -dependencies = [ - "byteorder", - "rmp", - "serde", -] - -[[package]] -name = "ron" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" -dependencies = [ - "base64 0.21.7", - "bitflags 2.9.1", - "serde", - "serde_derive", -] - -[[package]] -name = "rqd" -version = "0.1.5" -dependencies = [ - "async-stream", - "async-trait", - "bincode", - "bollard", - "bytesize", - "chrono", - "config", - "dashmap", - "device_query", - "futures", - "futures-core", - "http", - "http-body", - "http-body-util", - "humantime", - "humantime-serde", - "itertools 0.13.0", - "lazy_static", - "libc", - "log", - "miette", - "nix", - "opencue-proto", - "pin-project-lite", - "pnet", - "prost", - "rand 0.9.1", - "regex", - "serde", - "serde_derive", - "serde_json", - "sysinfo", - "tempfile", - "thiserror 1.0.69", - "tokio", - "tonic", - "tower", - "tower-layer", - "tower-service", - "tracing", - "tracing-appender", - "tracing-rolling-file", - "tracing-subscriber", - "ureq", - "users", - "uuid", -] - -[[package]] -name = "rust-ini" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0698206bcb8882bf2a9ecb4c1e7785db57ff052297085a6efd4fe42302068a" -dependencies = [ - "cfg-if", - "ordered-multimap", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" - -[[package]] -name = "rustix" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" -dependencies = [ - "bitflags 2.9.1", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustls" -version = "0.23.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" -dependencies = [ - "log", - "once_cell", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - -[[package]] -name = "rustls-pki-types" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" -dependencies = [ - "zeroize", -] - -[[package]] -name = "rustls-webpki" -version = "0.103.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted", -] - -[[package]] -name = "rustversion" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" - -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - -[[package]] -name = "schemars" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" -dependencies = [ - "dyn-clone", - "ref-cast", - "serde", - "serde_json", -] - -[[package]] -name = "schemars" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" -dependencies = [ - "dyn-clone", - "ref-cast", - "serde", - "serde_json", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "serde" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "serde_repr" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "serde_spanned" -version = "0.6.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" -dependencies = [ - "serde", -] - -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_with" -version = "3.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" -dependencies = [ - "base64 0.22.1", - "chrono", - "hex", - "indexmap 1.9.3", - "indexmap 2.10.0", - "schemars 0.9.0", - "schemars 1.0.4", - "serde", - "serde_derive", - "serde_json", - "time", -] - -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "signal-hook-registry" -version = "1.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" -dependencies = [ - "libc", -] - -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - -[[package]] -name = "slab" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "socket2" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" -dependencies = [ - "libc", - "windows-sys 0.59.0", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - -[[package]] -name = "structopt" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" -dependencies = [ - "clap", - "lazy_static", - "structopt-derive", -] - -[[package]] -name = "structopt-derive" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" -dependencies = [ - "heck 0.3.3", - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - -[[package]] -name = "supports-color" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6" -dependencies = [ - "is_ci", -] - -[[package]] -name = "supports-hyperlinks" -version = "3.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804f44ed3c63152de6a9f90acbea1a110441de43006ea51bcce8f436196a288b" - -[[package]] -name = "supports-unicode" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7401a30af6cb5818bb64852270bb722533397edcfc7344954a38f420819ece2" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "sync_wrapper" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "sysinfo" -version = "0.33.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc858248ea01b66f19d8e8a6d55f41deaf91e9d495246fd01368d99935c6c01" -dependencies = [ - "core-foundation-sys", - "libc", - "memchr", - "ntapi", - "rayon", - "windows 0.57.0", -] - -[[package]] -name = "tempfile" -version = "3.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" -dependencies = [ - "fastrand", - "getrandom 0.3.3", - "once_cell", - "rustix", - "windows-sys 0.59.0", -] - -[[package]] -name = "terminal_size" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" -dependencies = [ - "rustix", - "windows-sys 0.59.0", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width 0.1.14", -] - -[[package]] -name = "textwrap" -version = "0.16.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" -dependencies = [ - "unicode-linebreak", - "unicode-width 0.2.1", -] - -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - -[[package]] -name = "thiserror" -version = "2.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" -dependencies = [ - "thiserror-impl 2.0.12", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "thread_local" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "time" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" -dependencies = [ - "deranged", - "itoa", - "num-conv", - "powerfmt", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" - -[[package]] -name = "time-macros" -version = "0.2.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" -dependencies = [ - "num-conv", - "time-core", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinystr" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" -dependencies = [ - "displaydoc", - "zerovec", -] - -[[package]] -name = "tinyvec" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.47.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" -dependencies = [ - "backtrace", - "bytes", - "io-uring", - "libc", - "mio", - "parking_lot", - "pin-project-lite", - "signal-hook-registry", - "slab", - "socket2 0.6.0", - "tokio-macros", - "windows-sys 0.59.0", -] - -[[package]] -name = "tokio-macros" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "tokio-postgres" -version = "0.7.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c95d533c83082bb6490e0189acaa0bbeef9084e60471b696ca6988cd0541fb0" -dependencies = [ - "async-trait", - "byteorder", - "bytes", - "fallible-iterator", - "futures-channel", - "futures-util", - "log", - "parking_lot", - "percent-encoding", - "phf", - "pin-project-lite", - "postgres-protocol", - "postgres-types", - "rand 0.9.1", - "socket2 0.5.10", - "tokio", - "tokio-util", - "whoami", -] - -[[package]] -name = "tokio-stream" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "toml" -version = "0.8.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", -] - -[[package]] -name = "toml_datetime" -version = "0.6.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" -dependencies = [ - "serde", -] - -[[package]] -name = "toml_edit" -version = "0.22.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" -dependencies = [ - "indexmap 2.10.0", - "serde", - "serde_spanned", - "toml_datetime", - "toml_write", - "winnow", -] - -[[package]] -name = "toml_write" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" - -[[package]] -name = "tonic" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" -dependencies = [ - "async-trait", - "axum", - "base64 0.22.1", - "bytes", - "h2", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-timeout", - "hyper-util", - "percent-encoding", - "pin-project", - "prost", - "socket2 0.5.10", - "tokio", - "tokio-stream", - "tower", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tonic-build" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847" -dependencies = [ - "prettyplease", - "proc-macro2", - "prost-build", - "prost-types", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "tower" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 2.10.0", - "pin-project-lite", - "slab", - "sync_wrapper", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower-layer" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" - -[[package]] -name = "tower-service" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" - -[[package]] -name = "tracing" -version = "0.1.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-appender" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" -dependencies = [ - "crossbeam-channel", - "thiserror 1.0.69", - "time", - "tracing-subscriber", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "tracing-core" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" -dependencies = [ - "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-rolling-file" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf73ffe536cc623d6a101a3acb6ea7b5db28af8fca9709e3a8f8bce722cd16" -dependencies = [ - "chrono", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" -dependencies = [ - "nu-ansi-term", - "sharded-slab", - "smallvec", - "thread_local", - "tracing-core", - "tracing-log", -] - -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unicode-linebreak" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" - -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-properties" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" - -[[package]] -name = "unicode-segmentation" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" - -[[package]] -name = "unicode-width" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" - -[[package]] -name = "unicode-width" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" - -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - -[[package]] -name = "ureq" -version = "3.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00432f493971db5d8e47a65aeb3b02f8226b9b11f1450ff86bb772776ebadd70" -dependencies = [ - "base64 0.22.1", - "cookie_store", - "flate2", - "log", - "percent-encoding", - "rustls", - "rustls-pemfile", - "rustls-pki-types", - "serde", - "serde_json", - "ureq-proto", - "utf-8", - "webpki-roots", -] - -[[package]] -name = "ureq-proto" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5b6cabebbecc4c45189ab06b52f956206cea7d8c8a20851c35a85cb169224cc" -dependencies = [ - "base64 0.22.1", - "http", - "httparse", - "log", -] - -[[package]] -name = "url" -version = "2.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "users" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24cc0f6d6f267b73e5a2cadf007ba8f9bc39c6a6f9666f8cf25ea809a153b032" -dependencies = [ - "libc", - "log", -] - -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - -[[package]] -name = "uuid" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" -dependencies = [ - "getrandom 0.3.3", - "js-sys", - "serde", - "wasm-bindgen", -] - -[[package]] -name = "valuable" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" - -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - -[[package]] -name = "wasite" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" - -[[package]] -name = "wasm-bindgen" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.104", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "webpki-roots" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" -dependencies = [ - "rustls-pki-types", -] - -[[package]] -name = "whoami" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7" -dependencies = [ - "redox_syscall", - "wasite", - "web-sys", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows" -version = "0.57.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" -dependencies = [ - "windows-core 0.57.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-core" -version = "0.57.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" -dependencies = [ - "windows-implement 0.57.0", - "windows-interface 0.57.0", - "windows-result 0.1.2", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-core" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" -dependencies = [ - "windows-implement 0.60.0", - "windows-interface 0.59.1", - "windows-link", - "windows-result 0.3.4", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.57.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "windows-implement" -version = "0.60.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "windows-interface" -version = "0.57.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "windows-interface" -version = "0.59.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - -[[package]] -name = "windows-result" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-result" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.2", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", -] - -[[package]] -name = "windows-targets" -version = "0.53.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" -dependencies = [ - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_i686_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" - -[[package]] -name = "winnow" -version = "0.7.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" -dependencies = [ - "memchr", -] - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags 2.9.1", -] - -[[package]] -name = "writeable" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" - -[[package]] -name = "x11" -version = "2.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "502da5464ccd04011667b11c435cb992822c2c0dbde1770c988480d312a0db2e" -dependencies = [ - "libc", - "pkg-config", -] - -[[package]] -name = "yaml-rust2" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8902160c4e6f2fb145dbe9d6760a75e3c9522d8bf796ed7047c85919ac7115f8" -dependencies = [ - "arraydeque", - "encoding_rs", - "hashlink", -] - -[[package]] -name = "yoke" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", - "synstructure", -] - -[[package]] -name = "zeroize" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" - -[[package]] -name = "zerotrie" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 76b809ca6..db1332b16 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,11 +1,16 @@ [workspace] -members = ["crates/opencue-proto", "crates/rqd", "crates/dummy-cuebot"] -resolver = "3" +members = [ + "crates/opencue-proto", + "crates/rqd", + "crates/dummy-cuebot", + "crates/scheduler", +] +resolver = "2" [workspace.package] authors = ["Diego Tavares "] -edition = "2024" -version = "0.1.6" +edition = "2021" +version = "0.1.3" [workspace.dependencies] async-trait = "0.1" @@ -22,9 +27,11 @@ thiserror = "1.0" uuid = { version = "1.8.0", features = ["v4"] } whoami = "1.5.1" tokio = { version = "1.45", features = ["full"] } +tokio-stream = "0.1" miette = { version = "7.2.0", features = ["fancy"] } regex = "1.5.4" tracing = "0.1.40" tracing-appender = "0.2.3" tracing-rolling-file = "0.1.2" -tracing-subscriber = "0.3.20" +tracing-subscriber = { version = "0.3.18", features = ["ansi", "env-filter"] } +structopt = "0.3.26" diff --git a/rust/Dockerfile.scheduler b/rust/Dockerfile.scheduler new file mode 100644 index 000000000..cae05d7d6 --- /dev/null +++ b/rust/Dockerfile.scheduler @@ -0,0 +1,65 @@ +# Multi-stage Dockerfile for OpenCue Scheduler + +# Build stage +FROM rust:1.88-bookworm AS builder + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + protobuf-compiler \ + libprotobuf-dev \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +# Create app directory +WORKDIR /build + +# Copy workspace files +COPY Cargo.toml Cargo.lock ./ + +# Copy all crate directories +COPY crates ./crates + +# Remove symlink +RUN rm ./crates/opencue-proto/src/protos + +# Copy proto files +COPY proto ./crates/opencue-proto/src/protos + +# Build the scheduler in release mode +RUN cargo build --release -p scheduler + +# Runtime stage +FROM debian:bookworm-slim + +# Install runtime dependencies +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Create app user +RUN useradd -m -u 1000 -s /bin/bash scheduler + +# Create necessary directories +RUN mkdir -p /app/resources && chown -R scheduler:scheduler /app + +WORKDIR /app + +# Copy the built binary from builder stage +COPY --from=builder /build/target/release/cue-scheduler /app/cue-scheduler + +# Copy resources (schema file) +COPY --from=builder /build/crates/scheduler/resources /app/resources + +# Switch to non-root user +USER scheduler + +# Set debug environment variables when needed +# ENV RUST_LOG=info +# ENV RUST_BACKTRACE=1 + +# Container should mount the config file to this directory +ENV OPENCUE_SCHEDULER_CONFIG=/etc/cue-scheduler/rqd.yaml + +# Run the scheduler +ENTRYPOINT ["/app/cue-scheduler"] +CMD [] diff --git a/rust/config/rqd.fake_linux.yaml b/rust/config/rqd.dummy.cuebot.yaml similarity index 89% rename from rust/config/rqd.fake_linux.yaml rename to rust/config/rqd.dummy.cuebot.yaml index 4083bcc92..a4e2544c0 100644 --- a/rust/config/rqd.fake_linux.yaml +++ b/rust/config/rqd.dummy.cuebot.yaml @@ -2,7 +2,8 @@ logging: level: info grpc: rqd_port: 8444 - cuebot_endpoints: ["0.0.0.0:4343", "0.0.0.0:4343"] + # cuebot_endpoints: ["0.0.0.0:4343", "0.0.0.0:4343"] + cuebot_endpoints: ["localhost:8443"] connection_expires_after: 15m machine: # nimby_mode: true @@ -14,7 +15,7 @@ machine: distro_release_path: "/Users/dtavares/dev/rust_opencue/crates/rqd/resources/distro-release/rocky" proc_stat_path: "/Users/dtavares/dev/rust_opencue/crates/rqd/resources/proc/stat" proc_loadavg_path: "/Users/dtavares/dev/rust_opencue/crates/rqd/resources/proc/loadavg" - temp_path: "/tmp" + temp_path: "/Users/dtavares/tmp" use_session_id_for_proc_lineage: true runner: snapshots_path: $HOME/.rqd/snapshots diff --git a/rust/config/rqd.local.cuebot.yaml b/rust/config/rqd.local.cuebot.yaml new file mode 100644 index 000000000..a4e2544c0 --- /dev/null +++ b/rust/config/rqd.local.cuebot.yaml @@ -0,0 +1,41 @@ +logging: + level: info +grpc: + rqd_port: 8444 + # cuebot_endpoints: ["0.0.0.0:4343", "0.0.0.0:4343"] + cuebot_endpoints: ["localhost:8443"] + connection_expires_after: 15m +machine: + # nimby_mode: true + # nimby_idle_threshold: 60s + worker_threads: 8 + facility: test + monitor_interval: 3s + cpuinfo_path: "/Users/dtavares/dev/rust_opencue/crates/rqd/resources/cpuinfo/cpuinfo_srdsvr09_48-12-4" + distro_release_path: "/Users/dtavares/dev/rust_opencue/crates/rqd/resources/distro-release/rocky" + proc_stat_path: "/Users/dtavares/dev/rust_opencue/crates/rqd/resources/proc/stat" + proc_loadavg_path: "/Users/dtavares/dev/rust_opencue/crates/rqd/resources/proc/loadavg" + temp_path: "/Users/dtavares/tmp" + use_session_id_for_proc_lineage: true +runner: + snapshots_path: $HOME/.rqd/snapshots + kill_monitor_interval: 10s + kill_monitor_timeout: 60s + force_kill_after_timeout: true + docker.mounts: + - target: "" + source: "" + typ: "" + bind-propagation: "" + docker.images: + rhel7: "centos7:latest" + rocky9: "rockyimage:latest" +# monitor_interval_seconds: 3 +# use_ip_as_hostname: false +# nimby_mode: false +# override_real_values: +# cores: 4 +# procs: 8 +# memory: "2Gb" +# desktop_mode: true +# hostname: "some_host_name" diff --git a/rust/config/scheduler.yaml b/rust/config/scheduler.yaml new file mode 100644 index 000000000..438755506 --- /dev/null +++ b/rust/config/scheduler.yaml @@ -0,0 +1,217 @@ +# ============================================================================= +# LOGGING CONFIGURATION +# ============================================================================= +logging: + # Logging level for scheduler output + # Uses tracing EnvFilter format. + # See https://docs.rs/tracing-subscriber/0.2.19/tracing_subscriber/filter/struct.EnvFilter.html + # Default: "debug:sqlx=info" + level: info,sqlx=warn + + # Path to the log file if file_appender is enabled + # Default: "/opt/rqd/logs/scheduler.log" + # path: /opt/rqd/logs/scheduler.log + + # Log to file if true, stdout if false + # Default: false + # file_appender: false + +# ============================================================================= +# DATABASE CONFIGURATION +# ============================================================================= +database: + # Database connection pool size + # Default: 20 + # pool_size: 20 + + # Database host + # Default: "localhost" + db_host: localhost + + # Database name + # Default: "test" + db_name: cuebot + + # Database user + # Default: "postgres" + db_user: cuebot + + # Database password + # Default: "password" + db_pass: cuebot_password + + # Database port + # Default: 5432 + db_port: 5432 + + # Core multiplier for resource calculations + # Default: 100 + # core_multiplier: 100 + +# ============================================================================= +# RQD CONFIGURATION +# ============================================================================= +rqd: + # gRPC port for RQD communication + # Default: 8444 + # grpc_port: 8444 + + # If true, runs in dry-run mode without executing actual frame dispatches + # Default: false + dry_run_mode: true + +# ============================================================================= +# QUEUE CONFIGURATION +# ============================================================================= +queue: + # Interval between queue monitoring cycles + # Default: 5s + # monitor_interval: 5s + + # Number of worker threads for processing + # Default: 4 + # worker_threads: 4 + + # Maximum number of frames to dispatch per layer per cycle + # Default: 20 + # dispatch_frames_per_layer_limit: 20 + + # Core multiplier for resource calculations + # Default: 100 + # core_multiplier: 100 + + # Memory threshold below which a host is considered stranded + # Default: 2GiB + # memory_stranded_threshold: 2GiB + + # Duration to back off a job after dispatch failures + # Default: 300s (5 minutes) + # job_back_off_duration: 300s + + # Chunk size for processing manual tags + # Default: 100 + # manual_tags_chunk_size: 100 + + # Chunk size for processing hostname tags + # Default: 300 + # hostname_tags_chunk_size: 300 + + # Maximum number of host candidate attempts per layer + # Default: 10 + # host_candidate_attemps_per_layer: 10 + + # Number of empty job cycles before scheduler quits (None = run forever) + # Default: None + # empty_job_cycles_before_quiting: 10 + + # Minimum memory reserved per host + # Default: 250MiB + # mem_reserved_min: 250MiB + + # Interval to refresh allocation data + # Default: 3s + # allocation_refresh_interval: 3s + + # List of services that are selfish (require exclusive host access) + # Default: [] + # selfish_services: + # - service1 + # - service2 + + # Stream configuration + # stream: + # # Buffer size for cluster stream processing + # # Default: 3 + # cluster_buffer_size: 3 + # + # # Buffer size for job stream processing + # # Default: 3 + # job_buffer_size: 3 + + # Host booking strategy configuration + # host_booking_strategy: + # # Enable core saturation booking + # # Default: true + # core_saturation: true + # + # # Enable memory saturation booking + # # Default: false + # memory_saturation: false + + # Soft memory limit multiplier for frame memory requirements + # Used as a threshold to determine if a frame can be dispatched based on available memory + # Multiplied by the frame's memory requirement to calculate the soft limit + # Default: 1.6 + # frame_memory_soft_limit: 1.6 + + # Hard memory limit multiplier for frame memory requirements + # Used as a maximum threshold for frame memory allocation + # Multiplied by the frame's memory requirement to calculate the hard limit + # Default: 2.0 + # frame_memory_hard_limit: 2.0 + +# ============================================================================= +# HOST CACHE CONFIGURATION +# ============================================================================= +host_cache: + # Number of concurrent host groups to process + # Default: 3 + # concurrent_groups: 3 + + # Memory divisor for grouping hosts by memory size + # Default: 2GiB + # memory_key_divisor: 2GiB + + # Timeout for checking out hosts from cache + # Default: 12s + # checkout_timeout: 12s + + # Interval for monitoring host cache + # Default: 1s + # monitoring_interval: 1s + + # Idle timeout before a group is evicted from cache + # Default: 10800s (3 hours) + # group_idle_timeout: 10800s + + # Number of concurrent fetch permits + # Default: 4 + # concurrent_fetch_permit: 4 + + # Enable host stats update. + # On a production environment, cuebot is responsible for updating the + # host_stats table. + # On tests environmnts, not updating the table would lead to invalid + # memory information. Currently this toggle should only be enabled on + # test environments (stress_tests.rs) + # Default: false + # update_stat_on_book: false + +# ============================================================================= +# SCHEDULER CONFIGURATION +# ============================================================================= +scheduler: + # Optional facility code to run on (can be overridden with --facility flag) + # Default: None + # facility: eat + + # List of allocation tags in show:tag format (can be overridden with --alloc_tags flag) + # Default: [] + # alloc_tags: + # - show: show1 + # tag: general + # - show: show2 + # tag: priority + + # List of manual tags not associated with an allocation (can be overridden with --manual_tags flag) + # Default: [] + # manual_tags: + # - tag1 + # - tag2 + + # List of tags to ignore when loading clusters (can be overridden with --ignore_tags flag) + # These tags will be filtered out from all cluster types (manual, hostname, and alloc) + # Default: [] + # ignore_tags: + # - tag_to_ignore1 + # - tag_to_ignore2 diff --git a/rust/crates/dummy-cuebot/Cargo.toml b/rust/crates/dummy-cuebot/Cargo.toml index 467f958a1..5be2a7f41 100644 --- a/rust/crates/dummy-cuebot/Cargo.toml +++ b/rust/crates/dummy-cuebot/Cargo.toml @@ -29,7 +29,7 @@ tokio = { workspace = true } tokio-postgres = "0.7.12" tonic = { workspace = true } users = "0.11" -structopt = "0.3.26" +structopt = { workspace = true } [dev-dependencies] tempfile = "3.14.0" diff --git a/rust/crates/rqd/resources/test_scripts/memory_hungry.sh b/rust/crates/rqd/resources/test_scripts/memory_hungry.sh new file mode 100755 index 000000000..33c45aeb4 --- /dev/null +++ b/rust/crates/rqd/resources/test_scripts/memory_hungry.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# Check if memory argument is provided +if [ $# -eq 0 ]; then + echo "Usage: $0 " + echo "Example: $0 2.5 # Allocates approximately 2.5 GB" + exit 1 +fi + +MEMORY_GB=$1 + +# Validate that the argument is a number +if ! [[ "$MEMORY_GB" =~ ^[0-9]+\.?[0-9]*$ ]]; then + echo "Error: Memory argument must be a positive number" + exit 1 +fi + +# Calculate number of elements needed +# Based on empirical testing, each bash array element consumes approximately 200 bytes +# This accounts for the value storage plus bash's internal overhead +# Convert GB to bytes: GB * 1024^3 / 200 +ELEMENTS=$(awk "BEGIN {printf \"%.0f\", $MEMORY_GB * 1024 * 1024 * 1024 / 100}") + +echo "Allocating approximately ${MEMORY_GB} GB of memory (${ELEMENTS} array elements)..." + +# Function to allocate memory +allocate_memory() { + local elements=$1 + local array + + echo "Process $$ starting memory allocation..." + + # Fill array with data (allocate 3 elements per iteration for 3x speed) + for ((i=0; i/dev/null +wait + +echo "Script completed" diff --git a/rust/crates/rqd/src/system/reservation.rs b/rust/crates/rqd/src/system/reservation.rs index a5957ef08..d672ebb5d 100644 --- a/rust/crates/rqd/src/system/reservation.rs +++ b/rust/crates/rqd/src/system/reservation.rs @@ -165,19 +165,20 @@ impl CoreStateManager { } /// Get a list of all cores booked for this phys_id - fn get_bookings(&self, phys_id: &PhysId) -> impl Iterator { - self.bookings.values().flat_map(|booking| { + fn get_bookings(&self, phys_id: &PhysId) -> impl Iterator + '_ { + let phys_id = *phys_id; + self.bookings.values().flat_map(move |booking| { let cores: Vec = booking .cores .iter() - .filter(|&(phys_id_all, _)| *phys_id == *phys_id_all) + .filter(|&(phys_id_all, _)| phys_id == *phys_id_all) .map(|(_, core_id)| *core_id) .collect(); cores }) } - fn calculate_available_cores(&self) -> impl Iterator)> { + fn calculate_available_cores(&self) -> impl Iterator)> + '_ { self.processor_structure .cores_by_phys_id .iter() diff --git a/rust/crates/scheduler/Cargo.toml b/rust/crates/scheduler/Cargo.toml new file mode 100644 index 000000000..3686f5be7 --- /dev/null +++ b/rust/crates/scheduler/Cargo.toml @@ -0,0 +1,67 @@ +[package] +name = "scheduler" +authors = { workspace = true } +edition = { workspace = true } +version = { workspace = true } +license = "Apache-2.0" +description = "OpenCue Server Side Job Queueing Service" + +[[bin]] +path = "src/main.rs" +name = "cue-scheduler" + +[profile.bench] +debug = true + +[dependencies] +# Internal Dependencies +opencue-proto = { path = "../opencue-proto" } + +# External Dependencies +actix = "0.13" +chrono = "0.4.38" +futures = { workspace = true } +scc = "3.1" +serde = { version = "1.0", features = ["derive"] } +serde_derive = "1.0" +serde_json = "1.0" +async-trait = { workspace = true } +async-stream = { workspace = true } +config = { workspace = true } +thiserror = { workspace = true } +miette = { workspace = true } +tracing = { workspace = true } +tracing-appender = { workspace = true } +tracing-rolling-file = { workspace = true } +tracing-subscriber = { workspace = true, features = ["time"] } +uuid = { workspace = true, features = ["serde"] } +prost = { workspace = true } +tokio = { workspace = true } +tokio-stream = { workspace = true } +tokio-util = { version = "0.7" , feature = ["sync"]} +tonic = { workspace = true } +itertools = "0.13.0" +humantime = "2.2.0" +humantime-serde = "1.1.1" +sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "chrono"] } +structopt = { workspace = true } +once_cell = "1.13" +bytesize = { version = "1.2.0", features = ["serde"] } +regex = "1.0" +indexmap = "2.0" +lazy_static = "1.5" +moka = { version = "0.12.10", features = ["future"] } +prometheus = "0.13" +axum = "0.7" +tower-http = { version = "0.5", features = ["trace"] } +urlencoding = "2.1" + +[features] +default = [] +smoke-tests = [] + +[dev-dependencies] +tokio-test = "0.4" +tracing-test = "0.2" +serial_test = "3.0" +rand = "0.8" diff --git a/rust/crates/scheduler/resources/schema b/rust/crates/scheduler/resources/schema new file mode 100644 index 000000000..2173eed09 --- /dev/null +++ b/rust/crates/scheduler/resources/schema @@ -0,0 +1,4307 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 15.1 (Debian 15.1-1.pgdg110+1) +-- Dumped by pg_dump version 15.13 + +-- Started on 2025-09-04 18:51:09 UTC + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- TOC entry 266 (class 1259 OID 16828) +-- Name: action; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.action ( + pk_action character varying(36) NOT NULL, + pk_filter character varying(36) NOT NULL, + pk_folder character varying(36), + str_action character varying(24) NOT NULL, + str_value_type character varying(24) NOT NULL, + str_value character varying(4000), + int_value bigint, + b_value boolean, + ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + float_value numeric(6,2), + b_stop boolean DEFAULT false NOT NULL +); + + +ALTER TABLE public.action OWNER TO cuebot; + +-- +-- TOC entry 265 (class 1259 OID 16821) +-- Name: alloc; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.alloc ( + pk_alloc character varying(36) NOT NULL, + str_name character varying(36) NOT NULL, + b_allow_edit boolean DEFAULT true NOT NULL, + b_default boolean DEFAULT false NOT NULL, + str_tag character varying(24), + b_billable boolean DEFAULT true NOT NULL, + pk_facility character varying(36) NOT NULL, + b_enabled boolean DEFAULT true +); + + +ALTER TABLE public.alloc OWNER TO cuebot; + +-- +-- TOC entry 264 (class 1259 OID 16815) +-- Name: comments; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.comments ( + pk_comment character varying(36) NOT NULL, + pk_job character varying(36), + pk_host character varying(36), + ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + str_user character varying(36) NOT NULL, + str_subject character varying(128) NOT NULL, + str_message character varying(4000) NOT NULL +); + + +ALTER TABLE public.comments OWNER TO cuebot; + +-- +-- TOC entry 263 (class 1259 OID 16808) +-- Name: config; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.config ( + pk_config character varying(36) NOT NULL, + str_key character varying(36) NOT NULL, + int_value bigint DEFAULT 0, + long_value bigint DEFAULT 0, + str_value character varying(255) DEFAULT ''::character varying, + b_value boolean DEFAULT false +); + + +ALTER TABLE public.config OWNER TO cuebot; + +-- +-- TOC entry 224 (class 1259 OID 16453) +-- Name: deed; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.deed ( + pk_deed character varying(36) NOT NULL, + pk_owner character varying(36) NOT NULL, + pk_host character varying(36) NOT NULL, + b_blackout boolean DEFAULT false NOT NULL, + int_blackout_start integer, + int_blackout_stop integer +); + + +ALTER TABLE public.deed OWNER TO cuebot; + +-- +-- TOC entry 262 (class 1259 OID 16800) +-- Name: depend; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.depend ( + pk_depend character varying(36) NOT NULL, + pk_parent character varying(36), + pk_job_depend_on character varying(36) NOT NULL, + pk_job_depend_er character varying(36) NOT NULL, + pk_frame_depend_on character varying(36), + pk_frame_depend_er character varying(36), + pk_layer_depend_on character varying(36), + pk_layer_depend_er character varying(36), + str_type character varying(36) NOT NULL, + b_active boolean DEFAULT true NOT NULL, + b_any boolean DEFAULT false NOT NULL, + ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_satisfied timestamp(6) without time zone, + str_target character varying(20) DEFAULT 'Internal'::character varying NOT NULL, + str_signature character varying(36) NOT NULL, + b_composite boolean DEFAULT false NOT NULL +); + + +ALTER TABLE public.depend OWNER TO cuebot; + +-- +-- TOC entry 235 (class 1259 OID 16518) +-- Name: dept; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.dept ( + pk_dept character varying(36) NOT NULL, + str_name character varying(36) NOT NULL, + b_default boolean DEFAULT false NOT NULL +); + + +ALTER TABLE public.dept OWNER TO cuebot; + +-- +-- TOC entry 219 (class 1259 OID 16427) +-- Name: duplicate_cursors; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.duplicate_cursors ( + dt_recorded date, + inst_id numeric, + lng_count numeric +); + + +ALTER TABLE public.duplicate_cursors OWNER TO cuebot; + +-- +-- TOC entry 236 (class 1259 OID 16522) +-- Name: facility; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.facility ( + pk_facility character varying(36) NOT NULL, + str_name character varying(36) NOT NULL, + b_default boolean DEFAULT false NOT NULL +); + + +ALTER TABLE public.facility OWNER TO cuebot; + +-- +-- TOC entry 261 (class 1259 OID 16795) +-- Name: filter; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.filter ( + pk_filter character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + str_name character varying(128) NOT NULL, + str_type character varying(16) NOT NULL, + f_order numeric(6,2) DEFAULT 0.0 NOT NULL, + b_enabled boolean DEFAULT true NOT NULL +); + + +ALTER TABLE public.filter OWNER TO cuebot; + +-- +-- TOC entry 215 (class 1259 OID 16385) +-- Name: flyway_schema_history; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.flyway_schema_history ( + installed_rank integer NOT NULL, + version character varying(50), + description character varying(200) NOT NULL, + type character varying(20) NOT NULL, + script character varying(1000) NOT NULL, + checksum integer, + installed_by character varying(100) NOT NULL, + installed_on timestamp without time zone DEFAULT now() NOT NULL, + execution_time integer NOT NULL, + success boolean NOT NULL +); + + +ALTER TABLE public.flyway_schema_history OWNER TO cuebot; + +-- +-- TOC entry 260 (class 1259 OID 16783) +-- Name: folder; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.folder ( + pk_folder character varying(36) NOT NULL, + pk_parent_folder character varying(36), + pk_show character varying(36) NOT NULL, + str_name character varying(36) NOT NULL, + int_priority bigint DEFAULT 1 NOT NULL, + b_default boolean DEFAULT false NOT NULL, + pk_dept character varying(36) NOT NULL, + int_job_min_cores integer DEFAULT '-1'::integer NOT NULL, + int_job_max_cores integer DEFAULT '-1'::integer NOT NULL, + int_job_priority integer DEFAULT '-1'::integer NOT NULL, + int_min_cores integer DEFAULT 0 NOT NULL, + int_max_cores integer DEFAULT '-1'::integer NOT NULL, + b_exclude_managed boolean DEFAULT false NOT NULL, + f_order integer DEFAULT 0 NOT NULL, + int_job_min_gpus integer DEFAULT '-1'::integer NOT NULL, + int_job_max_gpus integer DEFAULT '-1'::integer NOT NULL, + int_min_gpus integer DEFAULT 0 NOT NULL, + int_max_gpus integer DEFAULT '-1'::integer NOT NULL +); + + +ALTER TABLE public.folder OWNER TO cuebot; + +-- +-- TOC entry 259 (class 1259 OID 16779) +-- Name: folder_level; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.folder_level ( + pk_folder_level character varying(36) NOT NULL, + pk_folder character varying(36) NOT NULL, + int_level bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.folder_level OWNER TO cuebot; + +-- +-- TOC entry 233 (class 1259 OID 16508) +-- Name: folder_resource; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.folder_resource ( + pk_folder_resource character varying(36) NOT NULL, + pk_folder character varying(36) NOT NULL, + int_cores integer DEFAULT 0 NOT NULL, + int_max_cores integer DEFAULT '-1'::integer NOT NULL, + int_min_cores integer DEFAULT 0 NOT NULL, + float_tier numeric(16,2) DEFAULT 0 NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL, + int_max_gpus integer DEFAULT '-1'::integer NOT NULL, + int_min_gpus integer DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.folder_resource OWNER TO cuebot; + +-- +-- TOC entry 258 (class 1259 OID 16761) +-- Name: frame; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.frame ( + pk_frame character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + str_name character varying(256) NOT NULL, + str_state character varying(24) NOT NULL, + int_number bigint NOT NULL, + int_depend_count bigint DEFAULT 0 NOT NULL, + int_exit_status bigint DEFAULT '-1'::integer NOT NULL, + int_retries bigint DEFAULT 0 NOT NULL, + int_mem_reserved bigint DEFAULT 0 NOT NULL, + int_mem_max_used bigint DEFAULT 0 NOT NULL, + int_mem_used bigint DEFAULT 0 NOT NULL, + int_dispatch_order bigint DEFAULT 0 NOT NULL, + str_host character varying(256), + int_cores integer DEFAULT 0 NOT NULL, + int_layer_order integer NOT NULL, + ts_started timestamp(6) with time zone, + ts_stopped timestamp(6) with time zone, + ts_last_run timestamp(6) with time zone, + ts_updated timestamp(6) with time zone, + int_version integer DEFAULT 0, + str_checkpoint_state character varying(12) DEFAULT 'DISABLED'::character varying NOT NULL, + int_checkpoint_count smallint DEFAULT 0 NOT NULL, + int_gpu_mem_reserved bigint DEFAULT 0 NOT NULL, + int_total_past_core_time integer DEFAULT 0 NOT NULL, + ts_llu timestamp(6) with time zone, + int_gpu_mem_used bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max_used bigint DEFAULT 0 NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL, + int_total_past_gpu_time integer DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.frame OWNER TO cuebot; + +-- +-- TOC entry 217 (class 1259 OID 16408) +-- Name: frame_history; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.frame_history ( + pk_frame_history character varying(36) DEFAULT public.uuid_generate_v1() NOT NULL, + pk_frame character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + str_name character varying(256) NOT NULL, + str_state character varying(24) NOT NULL, + int_mem_reserved bigint DEFAULT 0 NOT NULL, + int_mem_max_used bigint DEFAULT 0 NOT NULL, + int_cores integer DEFAULT 100 NOT NULL, + str_host character varying(64) DEFAULT NULL::character varying, + int_exit_status smallint DEFAULT '-1'::integer NOT NULL, + pk_alloc character varying(36), + int_ts_started integer NOT NULL, + int_ts_stopped integer DEFAULT 0 NOT NULL, + int_checkpoint_count integer DEFAULT 0 NOT NULL, + dt_last_modified date NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL, + int_gpu_mem_reserved bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max_used bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.frame_history OWNER TO cuebot; + +-- +-- TOC entry 4297 (class 0 OID 0) +-- Dependencies: 217 +-- Name: COLUMN frame_history.int_mem_reserved; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.frame_history.int_mem_reserved IS 'kilobytes of memory reserved'; + + +-- +-- TOC entry 4298 (class 0 OID 0) +-- Dependencies: 217 +-- Name: COLUMN frame_history.int_mem_max_used; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.frame_history.int_mem_max_used IS 'maximum kilobytes of rss memory used'; + + +-- +-- TOC entry 4299 (class 0 OID 0) +-- Dependencies: 217 +-- Name: COLUMN frame_history.int_cores; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.frame_history.int_cores IS '100 cores per physical core'; + + +-- +-- TOC entry 282 (class 1259 OID 17964) +-- Name: frame_state_display_overrides; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.frame_state_display_overrides ( + pk_frame_override character varying(36) NOT NULL, + pk_frame character varying(36) NOT NULL, + str_frame_state character varying(24) NOT NULL, + str_override_text character varying(24) NOT NULL, + str_rgb character varying(24) NOT NULL +); + + +ALTER TABLE public.frame_state_display_overrides OWNER TO cuebot; + +-- +-- TOC entry 218 (class 1259 OID 16421) +-- Name: history_period; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.history_period ( + pk character varying(36) DEFAULT public.uuid_generate_v1() NOT NULL, + dt_begin date DEFAULT to_date('01-JAN-2000'::text, 'DD-MON-YYYY'::text) NOT NULL, + dt_end date DEFAULT CURRENT_TIMESTAMP NOT NULL +); + + +ALTER TABLE public.history_period OWNER TO cuebot; + +-- +-- TOC entry 216 (class 1259 OID 16405) +-- Name: history_period_bak; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.history_period_bak ( + pk character varying(32), + dt_begin date NOT NULL, + dt_end date NOT NULL +); + + +ALTER TABLE public.history_period_bak OWNER TO cuebot; + +-- +-- TOC entry 257 (class 1259 OID 16742) +-- Name: host; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.host ( + pk_host character varying(36) NOT NULL, + pk_alloc character varying(36) NOT NULL, + str_name character varying(45) NOT NULL, + str_lock_state character varying(36) NOT NULL, + b_nimby boolean DEFAULT false NOT NULL, + ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_last_updated timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + int_cores bigint DEFAULT 0 NOT NULL, + int_procs bigint DEFAULT 0 NOT NULL, + int_cores_idle bigint DEFAULT 0 NOT NULL, + int_mem bigint DEFAULT 0 NOT NULL, + int_mem_idle bigint DEFAULT 0 NOT NULL, + b_unlock_boot boolean DEFAULT false NOT NULL, + b_unlock_idle boolean DEFAULT false NOT NULL, + b_reboot_idle boolean DEFAULT false NOT NULL, + str_tags character varying(128), + str_fqdn character varying(128), + b_comment boolean DEFAULT false NOT NULL, + int_thread_mode integer DEFAULT 0 NOT NULL, + str_lock_source character varying(128), + int_gpu_mem bigint DEFAULT 0 NOT NULL, + int_gpu_mem_idle bigint DEFAULT 0 NOT NULL, + int_gpus bigint DEFAULT 0 NOT NULL, + int_gpus_idle bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.host OWNER TO cuebot; + +-- +-- TOC entry 226 (class 1259 OID 16462) +-- Name: host_local; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.host_local ( + pk_host_local character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + pk_layer character varying(36), + pk_frame character varying(36), + pk_host character varying(36) NOT NULL, + ts_created timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_updated timestamp(6) with time zone, + int_mem_max bigint DEFAULT 0 NOT NULL, + int_mem_idle bigint DEFAULT 0 NOT NULL, + int_cores_max integer DEFAULT 100 NOT NULL, + int_cores_idle integer DEFAULT 100 NOT NULL, + int_threads integer DEFAULT 1 NOT NULL, + float_tier numeric(16,2) DEFAULT 0 NOT NULL, + b_active boolean DEFAULT true NOT NULL, + str_type character varying(36) NOT NULL, + int_gpu_mem_idle bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max bigint DEFAULT 0 NOT NULL, + int_gpus_idle integer DEFAULT 0 NOT NULL, + int_gpus_max integer DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.host_local OWNER TO cuebot; + +-- +-- TOC entry 256 (class 1259 OID 16726) +-- Name: host_stat; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.host_stat ( + pk_host_stat character varying(36) NOT NULL, + pk_host character varying(36) NOT NULL, + int_mem_total bigint DEFAULT 0 NOT NULL, + int_mem_free bigint DEFAULT 0 NOT NULL, + int_swap_total bigint DEFAULT 0 NOT NULL, + int_swap_free bigint DEFAULT 0 NOT NULL, + int_mcp_total bigint DEFAULT 0 NOT NULL, + int_mcp_free bigint DEFAULT 0 NOT NULL, + int_load bigint DEFAULT 0 NOT NULL, + ts_ping timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_booted timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + str_state character varying(32) DEFAULT 'UP'::character varying NOT NULL, + str_os character varying(12) DEFAULT 'rhel40'::character varying NOT NULL, + int_gpu_mem_total bigint DEFAULT 0 NOT NULL, + int_gpu_mem_free bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.host_stat OWNER TO cuebot; + +-- +-- TOC entry 241 (class 1259 OID 16572) +-- Name: host_tag; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.host_tag ( + pk_host_tag character varying(36) NOT NULL, + pk_host character varying(36) NOT NULL, + str_tag character varying(45) NOT NULL, + str_tag_type character varying(24) DEFAULT 'Hardware'::character varying NOT NULL, + b_constant boolean DEFAULT false NOT NULL +); + + +ALTER TABLE public.host_tag OWNER TO cuebot; + +-- +-- TOC entry 255 (class 1259 OID 16705) +-- Name: job; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job ( + pk_job character varying(36) NOT NULL, + pk_folder character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + str_name character varying(255) NOT NULL, + str_visible_name character varying(255), + str_shot character varying(64) NOT NULL, + str_user character varying(32) NOT NULL, + str_state character varying(16) NOT NULL, + str_log_dir character varying(4000) DEFAULT ''::character varying NOT NULL, + int_uid bigint, + b_paused boolean DEFAULT false NOT NULL, + b_autoeat boolean DEFAULT false NOT NULL, + int_frame_count integer DEFAULT 0 NOT NULL, + int_layer_count integer DEFAULT 0 NOT NULL, + int_max_retries smallint DEFAULT 3 NOT NULL, + b_auto_book boolean DEFAULT true NOT NULL, + b_auto_unbook boolean DEFAULT true NOT NULL, + b_comment boolean DEFAULT false NOT NULL, + str_email character varying(256), + pk_facility character varying(36) NOT NULL, + pk_dept character varying(36) NOT NULL, + ts_started timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_stopped timestamp(6) with time zone, + int_min_cores integer DEFAULT 100 NOT NULL, + int_max_cores integer DEFAULT 20000 NOT NULL, + str_show character varying(512) DEFAULT 'none'::character varying NOT NULL, + ts_updated timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + str_os character varying(12), + int_min_gpus integer DEFAULT 0 NOT NULL, + int_max_gpus integer DEFAULT 100000 NOT NULL +); + + +ALTER TABLE public.job OWNER TO cuebot; + +-- +-- TOC entry 254 (class 1259 OID 16700) +-- Name: job_env; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_env ( + pk_job_env character varying(36) NOT NULL, + pk_job character varying(36), + str_key character varying(2048), + str_value character varying(2048) +); + + +ALTER TABLE public.job_env OWNER TO cuebot; + +-- +-- TOC entry 239 (class 1259 OID 16548) +-- Name: job_history; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_history ( + pk_job character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + str_name character varying(512) NOT NULL, + str_shot character varying(64) NOT NULL, + str_user character varying(36) NOT NULL, + int_core_time_success bigint DEFAULT 0 NOT NULL, + int_core_time_fail bigint DEFAULT 0 NOT NULL, + int_frame_count bigint DEFAULT 0 NOT NULL, + int_layer_count bigint DEFAULT 0 NOT NULL, + int_waiting_count bigint DEFAULT 0 NOT NULL, + int_dead_count bigint DEFAULT 0 NOT NULL, + int_depend_count bigint DEFAULT 0 NOT NULL, + int_eaten_count bigint DEFAULT 0 NOT NULL, + int_succeeded_count bigint DEFAULT 0 NOT NULL, + int_running_count bigint DEFAULT 0 NOT NULL, + int_max_rss bigint DEFAULT 0 NOT NULL, + b_archived boolean DEFAULT false NOT NULL, + pk_facility character varying(36) NOT NULL, + pk_dept character varying(36) NOT NULL, + int_ts_started integer NOT NULL, + int_ts_stopped integer DEFAULT 0 NOT NULL, + dt_last_modified date NOT NULL, + int_gpu_time_success bigint DEFAULT 0 NOT NULL, + int_gpu_time_fail bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.job_history OWNER TO cuebot; + +-- +-- TOC entry 4300 (class 0 OID 0) +-- Dependencies: 239 +-- Name: COLUMN job_history.int_core_time_success; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.job_history.int_core_time_success IS 'seconds per core succeeded'; + + +-- +-- TOC entry 4301 (class 0 OID 0) +-- Dependencies: 239 +-- Name: COLUMN job_history.int_core_time_fail; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.job_history.int_core_time_fail IS 'seconds per core failed'; + + +-- +-- TOC entry 4302 (class 0 OID 0) +-- Dependencies: 239 +-- Name: COLUMN job_history.int_max_rss; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.job_history.int_max_rss IS 'maximum kilobytes of rss memory used by a single frame'; + + +-- +-- TOC entry 228 (class 1259 OID 16480) +-- Name: job_local; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_local ( + pk_job_local character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + pk_host character varying(36) NOT NULL, + str_source character varying(255) NOT NULL, + ts_created timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + int_cores integer DEFAULT 0 NOT NULL, + int_max_cores integer NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL, + int_max_gpus integer DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.job_local OWNER TO cuebot; + +-- +-- TOC entry 232 (class 1259 OID 16503) +-- Name: job_mem; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_mem ( + pk_job_mem character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + int_max_rss bigint DEFAULT 0 NOT NULL, + int_max_vss bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.job_mem OWNER TO cuebot; + +-- +-- TOC entry 237 (class 1259 OID 16526) +-- Name: job_post; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_post ( + pk_job_post character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + pk_post_job character varying(36) NOT NULL +); + + +ALTER TABLE public.job_post OWNER TO cuebot; + +-- +-- TOC entry 243 (class 1259 OID 16587) +-- Name: job_resource; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_resource ( + pk_job_resource character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + int_cores bigint DEFAULT 0 NOT NULL, + int_max_rss bigint DEFAULT 0 NOT NULL, + int_max_vss bigint DEFAULT 0 NOT NULL, + int_min_cores integer DEFAULT 100 NOT NULL, + int_max_cores integer DEFAULT 10000 NOT NULL, + float_tier numeric(16,2) DEFAULT 0 NOT NULL, + int_priority integer DEFAULT 1 NOT NULL, + int_local_cores integer DEFAULT 0 NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL, + int_min_gpus integer DEFAULT 0 NOT NULL, + int_max_gpus integer DEFAULT 100 NOT NULL, + int_local_gpus integer DEFAULT 0 NOT NULL, + int_gpu_mem_max bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.job_resource OWNER TO cuebot; + +-- +-- TOC entry 244 (class 1259 OID 16598) +-- Name: job_stat; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_stat ( + pk_job_stat character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + int_waiting_count bigint DEFAULT 0 NOT NULL, + int_running_count bigint DEFAULT 0 NOT NULL, + int_dead_count bigint DEFAULT 0 NOT NULL, + int_depend_count bigint DEFAULT 0 NOT NULL, + int_eaten_count bigint DEFAULT 0 NOT NULL, + int_succeeded_count bigint DEFAULT 0 NOT NULL, + int_checkpoint_count bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.job_stat OWNER TO cuebot; + +-- +-- TOC entry 242 (class 1259 OID 16577) +-- Name: job_usage; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.job_usage ( + pk_job_usage character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + int_core_time_success bigint DEFAULT 0 NOT NULL, + int_core_time_fail bigint DEFAULT 0 NOT NULL, + int_frame_success_count integer DEFAULT 0 NOT NULL, + int_frame_fail_count integer DEFAULT 0 NOT NULL, + int_clock_time_fail integer DEFAULT 0 NOT NULL, + int_clock_time_high integer DEFAULT 0 NOT NULL, + int_clock_time_success integer DEFAULT 0 NOT NULL, + int_gpu_time_success bigint DEFAULT 0 NOT NULL, + int_gpu_time_fail bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.job_usage OWNER TO cuebot; + +-- +-- TOC entry 253 (class 1259 OID 16685) +-- Name: layer; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer ( + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + str_name character varying(256) NOT NULL, + str_cmd character varying(4000) NOT NULL, + str_range character varying(4000) NOT NULL, + int_chunk_size bigint DEFAULT 1 NOT NULL, + int_dispatch_order bigint DEFAULT 1 NOT NULL, + int_cores_min bigint DEFAULT 100 NOT NULL, + int_mem_min bigint DEFAULT 4194304 NOT NULL, + str_tags character varying(4000) DEFAULT ''::character varying NOT NULL, + str_type character varying(16) NOT NULL, + b_threadable boolean DEFAULT true NOT NULL, + str_services character varying(128) DEFAULT 'default'::character varying NOT NULL, + b_optimize boolean DEFAULT true NOT NULL, + int_cores_max integer DEFAULT 0 NOT NULL, + int_gpu_mem_min bigint DEFAULT 0 NOT NULL, + int_timeout integer DEFAULT 0 NOT NULL, + int_timeout_llu integer DEFAULT 0 NOT NULL, + int_gpus_min bigint DEFAULT 0 NOT NULL, + int_gpus_max bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.layer OWNER TO cuebot; + +-- +-- TOC entry 252 (class 1259 OID 16680) +-- Name: layer_env; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_env ( + pk_layer_env character varying(36) NOT NULL, + pk_layer character varying(36), + pk_job character varying(36), + str_key character varying(2048), + str_value character varying(2048) +); + + +ALTER TABLE public.layer_env OWNER TO cuebot; + +-- +-- TOC entry 238 (class 1259 OID 16529) +-- Name: layer_history; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_history ( + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + str_name character varying(512) NOT NULL, + str_type character varying(16) NOT NULL, + int_cores_min bigint DEFAULT 100 NOT NULL, + int_mem_min bigint DEFAULT 4194304 NOT NULL, + int_core_time_success bigint DEFAULT 0 NOT NULL, + int_core_time_fail bigint DEFAULT 0 NOT NULL, + int_frame_count bigint DEFAULT 0 NOT NULL, + int_layer_count bigint DEFAULT 0 NOT NULL, + int_waiting_count bigint DEFAULT 0 NOT NULL, + int_dead_count bigint DEFAULT 0 NOT NULL, + int_depend_count bigint DEFAULT 0 NOT NULL, + int_eaten_count bigint DEFAULT 0 NOT NULL, + int_succeeded_count bigint DEFAULT 0 NOT NULL, + int_running_count bigint DEFAULT 0 NOT NULL, + int_max_rss bigint DEFAULT 0 NOT NULL, + b_archived boolean DEFAULT false NOT NULL, + dt_last_modified date NOT NULL, + str_services character varying(128), + int_gpus_min integer DEFAULT 0 NOT NULL, + int_gpu_time_success bigint DEFAULT 0 NOT NULL, + int_gpu_time_fail bigint DEFAULT 0 NOT NULL, + int_gpu_mem_min bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.layer_history OWNER TO cuebot; + +-- +-- TOC entry 4303 (class 0 OID 0) +-- Dependencies: 238 +-- Name: COLUMN layer_history.int_core_time_success; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.layer_history.int_core_time_success IS 'seconds per core succeeded'; + + +-- +-- TOC entry 4304 (class 0 OID 0) +-- Dependencies: 238 +-- Name: COLUMN layer_history.int_core_time_fail; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.layer_history.int_core_time_fail IS 'seconds per core failed'; + + +-- +-- TOC entry 4305 (class 0 OID 0) +-- Dependencies: 238 +-- Name: COLUMN layer_history.int_max_rss; Type: COMMENT; Schema: public; Owner: cuebot +-- + +COMMENT ON COLUMN public.layer_history.int_max_rss IS 'maximum kilobytes of rss memory used by a single frame'; + + +-- +-- TOC entry 273 (class 1259 OID 17546) +-- Name: layer_limit; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_limit ( + pk_layer_limit character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + pk_limit_record character varying(36) NOT NULL +); + + +ALTER TABLE public.layer_limit OWNER TO cuebot; + +-- +-- TOC entry 231 (class 1259 OID 16498) +-- Name: layer_mem; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_mem ( + pk_layer_mem character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + int_max_rss bigint DEFAULT 0 NOT NULL, + int_max_vss bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.layer_mem OWNER TO cuebot; + +-- +-- TOC entry 222 (class 1259 OID 16443) +-- Name: layer_output; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_output ( + pk_layer_output character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + str_filespec character varying(2048) NOT NULL, + ser_order integer NOT NULL +); + + +ALTER TABLE public.layer_output OWNER TO cuebot; + +-- +-- TOC entry 283 (class 1259 OID 17975) +-- Name: layer_output_ser_order_seq; Type: SEQUENCE; Schema: public; Owner: cuebot +-- + +CREATE SEQUENCE public.layer_output_ser_order_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER TABLE public.layer_output_ser_order_seq OWNER TO cuebot; + +-- +-- TOC entry 4306 (class 0 OID 0) +-- Dependencies: 283 +-- Name: layer_output_ser_order_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: cuebot +-- + +ALTER SEQUENCE public.layer_output_ser_order_seq OWNED BY public.layer_output.ser_order; + + +-- +-- TOC entry 251 (class 1259 OID 16674) +-- Name: layer_resource; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_resource ( + pk_layer_resource character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + int_cores bigint DEFAULT 0 NOT NULL, + int_max_rss bigint DEFAULT 0 NOT NULL, + int_max_vss bigint DEFAULT 0 NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL, + int_gpu_mem_max bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.layer_resource OWNER TO cuebot; + +-- +-- TOC entry 250 (class 1259 OID 16663) +-- Name: layer_stat; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_stat ( + pk_layer_stat character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + int_total_count bigint DEFAULT 0 NOT NULL, + int_waiting_count bigint DEFAULT 0 NOT NULL, + int_running_count bigint DEFAULT 0 NOT NULL, + int_dead_count bigint DEFAULT 0 NOT NULL, + int_depend_count bigint DEFAULT 0 NOT NULL, + int_eaten_count bigint DEFAULT 0 NOT NULL, + int_succeeded_count bigint DEFAULT 0 NOT NULL, + int_checkpoint_count bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.layer_stat OWNER TO cuebot; + +-- +-- TOC entry 249 (class 1259 OID 16652) +-- Name: layer_usage; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.layer_usage ( + pk_layer_usage character varying(36) NOT NULL, + pk_layer character varying(36) NOT NULL, + pk_job character varying(36) NOT NULL, + int_core_time_success bigint DEFAULT 0 NOT NULL, + int_core_time_fail bigint DEFAULT 0 NOT NULL, + int_frame_success_count integer DEFAULT 0 NOT NULL, + int_frame_fail_count integer DEFAULT 0 NOT NULL, + int_clock_time_fail integer DEFAULT 0 NOT NULL, + int_clock_time_high integer DEFAULT 0 NOT NULL, + int_clock_time_low integer DEFAULT 0 NOT NULL, + int_clock_time_success integer DEFAULT 0 NOT NULL, + int_gpu_time_success bigint DEFAULT 0 NOT NULL, + int_gpu_time_fail bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.layer_usage OWNER TO cuebot; + +-- +-- TOC entry 272 (class 1259 OID 17542) +-- Name: limit_record; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.limit_record ( + pk_limit_record character varying(36) NOT NULL, + str_name character varying(255) NOT NULL, + int_max_value integer, + b_host_limit boolean DEFAULT false NOT NULL +); + + +ALTER TABLE public.limit_record OWNER TO cuebot; + +-- +-- TOC entry 248 (class 1259 OID 16646) +-- Name: matcher; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.matcher ( + pk_matcher character varying(36) NOT NULL, + pk_filter character varying(36) NOT NULL, + str_subject character varying(64) NOT NULL, + str_match character varying(64) NOT NULL, + str_value character varying(6000) NOT NULL, + ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL +); + + +ALTER TABLE public.matcher OWNER TO cuebot; + +-- +-- TOC entry 225 (class 1259 OID 16457) +-- Name: owner; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.owner ( + pk_owner character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + str_username character varying(64) NOT NULL, + ts_created timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_updated timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL +); + + +ALTER TABLE public.owner OWNER TO cuebot; + +-- +-- TOC entry 230 (class 1259 OID 16490) +-- Name: point; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.point ( + pk_point character varying(36) NOT NULL, + pk_dept character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + str_ti_task character varying(36), + int_cores integer DEFAULT 0 NOT NULL, + b_managed boolean DEFAULT false NOT NULL, + int_min_cores integer DEFAULT 0 NOT NULL, + float_tier numeric(16,2) DEFAULT 0 NOT NULL, + ts_updated timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL, + int_min_gpus integer DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.point OWNER TO cuebot; + +-- +-- TOC entry 247 (class 1259 OID 16630) +-- Name: proc; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.proc ( + pk_proc character varying(36) NOT NULL, + pk_host character varying(36) NOT NULL, + pk_job character varying(36), + pk_show character varying(36), + pk_layer character varying(36), + pk_frame character varying(36), + int_cores_reserved bigint NOT NULL, + int_mem_reserved bigint NOT NULL, + int_mem_used bigint DEFAULT 0 NOT NULL, + int_mem_max_used bigint DEFAULT 0 NOT NULL, + b_unbooked boolean DEFAULT false NOT NULL, + int_mem_pre_reserved bigint DEFAULT 0 NOT NULL, + int_virt_used bigint DEFAULT 0 NOT NULL, + int_virt_max_used bigint DEFAULT 0 NOT NULL, + str_redirect character varying(265), + b_local boolean DEFAULT false NOT NULL, + ts_ping timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_booked timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + ts_dispatched timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + int_gpu_mem_reserved bigint DEFAULT 0 NOT NULL, + int_gpus_reserved integer DEFAULT 0 NOT NULL, + int_gpu_mem_used bigint DEFAULT 0 NOT NULL, + int_gpu_mem_max_used bigint DEFAULT 0 NOT NULL, + int_gpu_mem_pre_reserved bigint DEFAULT 0 NOT NULL, + bytea_children bytea, + int_swap_used bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.proc OWNER TO cuebot; + +-- +-- TOC entry 267 (class 1259 OID 16835) +-- Name: redirect; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.redirect ( + pk_proc character varying(36) NOT NULL, + str_group_id character varying(36) NOT NULL, + int_type bigint NOT NULL, + str_destination_id character varying(512) NOT NULL, + str_name character varying(512) NOT NULL, + lng_creation_time bigint NOT NULL +); + + +ALTER TABLE public.redirect OWNER TO cuebot; + +-- +-- TOC entry 227 (class 1259 OID 16475) +-- Name: service; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.service ( + pk_service character varying(36) NOT NULL, + str_name character varying(36) NOT NULL, + b_threadable boolean NOT NULL, + int_cores_min integer NOT NULL, + int_mem_min integer NOT NULL, + str_tags character varying(128) NOT NULL, + int_cores_max integer DEFAULT 0 NOT NULL, + int_gpu_mem_min bigint DEFAULT 0 NOT NULL, + int_timeout integer DEFAULT 0 NOT NULL, + int_timeout_llu integer DEFAULT 0 NOT NULL, + int_gpus_min integer DEFAULT 0 NOT NULL, + int_gpus_max integer DEFAULT 0 NOT NULL, + int_min_memory_increase integer DEFAULT 2097152 NOT NULL +); + + +ALTER TABLE public.service OWNER TO cuebot; + +-- +-- TOC entry 246 (class 1259 OID 16615) +-- Name: show; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.show ( + pk_show character varying(36) NOT NULL, + str_name character varying(512) NOT NULL, + b_paused boolean DEFAULT false NOT NULL, + int_default_min_cores integer DEFAULT 100 NOT NULL, + int_default_max_cores integer DEFAULT 10000 NOT NULL, + b_booking_enabled boolean DEFAULT true NOT NULL, + b_dispatch_enabled boolean DEFAULT true NOT NULL, + b_active boolean DEFAULT true NOT NULL, + str_comment_email character varying(1024), + int_default_min_gpus integer DEFAULT 100 NOT NULL, + int_default_max_gpus integer DEFAULT 100000 NOT NULL +); + + +ALTER TABLE public.show OWNER TO cuebot; + +-- +-- TOC entry 234 (class 1259 OID 16515) +-- Name: show_alias; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.show_alias ( + pk_show_alias character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + str_name character varying(16) NOT NULL +); + + +ALTER TABLE public.show_alias OWNER TO cuebot; + +-- +-- TOC entry 223 (class 1259 OID 16448) +-- Name: show_service; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.show_service ( + pk_show_service character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + str_name character varying(36) NOT NULL, + b_threadable boolean NOT NULL, + int_cores_min integer NOT NULL, + int_mem_min integer NOT NULL, + str_tags character varying(128) NOT NULL, + int_cores_max integer DEFAULT 0 NOT NULL, + int_gpu_mem_min bigint DEFAULT 0 NOT NULL, + int_timeout integer DEFAULT 0 NOT NULL, + int_timeout_llu integer DEFAULT 0 NOT NULL, + int_gpus_min integer DEFAULT 0 NOT NULL, + int_gpus_max integer DEFAULT 0 NOT NULL, + int_min_memory_increase integer DEFAULT 2097152 NOT NULL +); + + +ALTER TABLE public.show_service OWNER TO cuebot; + +-- +-- TOC entry 281 (class 1259 OID 17947) +-- Name: show_stats; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.show_stats ( + pk_show character varying(36) NOT NULL, + int_frame_insert_count bigint DEFAULT 0 NOT NULL, + int_job_insert_count bigint DEFAULT 0 NOT NULL, + int_frame_success_count bigint DEFAULT 0 NOT NULL, + int_frame_fail_count bigint DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.show_stats OWNER TO cuebot; + +-- +-- TOC entry 245 (class 1259 OID 16608) +-- Name: subscription; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.subscription ( + pk_subscription character varying(36) NOT NULL, + pk_alloc character varying(36) NOT NULL, + pk_show character varying(36) NOT NULL, + int_size bigint DEFAULT 0 NOT NULL, + int_burst bigint DEFAULT 0 NOT NULL, + int_cores integer DEFAULT 0 NOT NULL, + float_tier numeric(16,2) DEFAULT 0 NOT NULL, + int_gpus integer DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.subscription OWNER TO cuebot; + +-- +-- TOC entry 229 (class 1259 OID 16485) +-- Name: task; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.task ( + pk_task character varying(36) NOT NULL, + pk_point character varying(36) NOT NULL, + str_shot character varying(36) NOT NULL, + int_min_cores integer DEFAULT 100 NOT NULL, + int_adjust_cores integer DEFAULT 0 NOT NULL, + int_min_gpus integer DEFAULT 0 NOT NULL, + int_adjust_gpus integer DEFAULT 0 NOT NULL +); + + +ALTER TABLE public.task OWNER TO cuebot; + +-- +-- TOC entry 240 (class 1259 OID 16566) +-- Name: task_lock; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.task_lock ( + pk_task_lock character varying(36) NOT NULL, + str_name character varying(36) NOT NULL, + int_lock bigint DEFAULT 0 NOT NULL, + int_timeout bigint DEFAULT 30 NOT NULL, + ts_lastrun timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL +); + + +ALTER TABLE public.task_lock OWNER TO cuebot; + +-- +-- TOC entry 221 (class 1259 OID 16437) +-- Name: uncommitted_transactions; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.uncommitted_transactions ( + inst_id numeric, + sid numeric, + serial numeric, + username character varying(30), + machine character varying(64), + module character varying(48), + service_name character varying(64), + duration numeric, + dt_recorded date DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.uncommitted_transactions OWNER TO cuebot; + +-- +-- TOC entry 220 (class 1259 OID 16432) +-- Name: uncommitted_transactions_bak; Type: TABLE; Schema: public; Owner: cuebot +-- + +CREATE TABLE public.uncommitted_transactions_bak ( + inst_id numeric, + sid numeric, + serial numeric, + username character varying(30), + machine character varying(64), + module character varying(48), + service_name character varying(64), + duration numeric, + dt_recorded date +); + + +ALTER TABLE public.uncommitted_transactions_bak OWNER TO cuebot; + +-- +-- TOC entry 278 (class 1259 OID 17924) +-- Name: v_history_frame; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.v_history_frame AS + SELECT fh.pk_frame_history, + fh.pk_frame, + fh.pk_layer, + fh.pk_job, + fh.str_name, + fh.str_state, + fh.int_mem_reserved, + fh.int_mem_max_used, + fh.int_cores, + fh.int_gpu_mem_reserved, + fh.int_gpu_mem_max_used, + fh.int_gpus, + fh.str_host, + fh.int_exit_status, + a.str_name AS str_alloc_name, + a.b_billable AS b_alloc_billable, + f.str_name AS str_facility_name, + fh.int_ts_started, + fh.int_ts_stopped, + fh.int_checkpoint_count, + NULL::text AS str_show_name, + fh.dt_last_modified + FROM (((public.frame_history fh + JOIN public.job_history jh ON (((fh.pk_job)::text = (jh.pk_job)::text))) + LEFT JOIN public.alloc a ON (((fh.pk_alloc)::text = (a.pk_alloc)::text))) + LEFT JOIN public.facility f ON (((a.pk_facility)::text = (f.pk_facility)::text))) + WHERE ((fh.dt_last_modified >= ( SELECT history_period.dt_begin + FROM public.history_period)) AND (fh.dt_last_modified < ( SELECT history_period.dt_end + FROM public.history_period))); + + +ALTER TABLE public.v_history_frame OWNER TO cuebot; + +-- +-- TOC entry 279 (class 1259 OID 17929) +-- Name: v_history_job; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.v_history_job AS + SELECT jh.pk_job, + jh.str_name, + jh.str_shot, + jh.str_user, + jh.int_core_time_success, + jh.int_core_time_fail, + jh.int_gpu_time_success, + jh.int_gpu_time_fail, + jh.int_frame_count, + jh.int_layer_count, + jh.int_waiting_count, + jh.int_dead_count, + jh.int_depend_count, + jh.int_eaten_count, + jh.int_succeeded_count, + jh.int_running_count, + jh.int_max_rss, + jh.int_gpu_mem_max, + jh.b_archived, + f.str_name AS str_facility_name, + d.str_name AS str_dept_name, + jh.int_ts_started, + jh.int_ts_stopped, + s.str_name AS str_show_name, + jh.dt_last_modified + FROM public.job_history jh, + public.show s, + public.facility f, + public.dept d + WHERE (((jh.pk_show)::text = (s.pk_show)::text) AND ((jh.pk_facility)::text = (f.pk_facility)::text) AND ((jh.pk_dept)::text = (d.pk_dept)::text) AND ((jh.dt_last_modified >= ( SELECT history_period.dt_begin + FROM public.history_period)) OR (jh.int_ts_stopped = 0))); + + +ALTER TABLE public.v_history_job OWNER TO cuebot; + +-- +-- TOC entry 280 (class 1259 OID 17934) +-- Name: v_history_layer; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.v_history_layer AS + SELECT lh.pk_layer, + lh.pk_job, + lh.str_name, + lh.str_type, + lh.int_cores_min, + lh.int_mem_min, + lh.int_gpus_min, + lh.int_gpu_mem_min, + lh.int_core_time_success, + lh.int_core_time_fail, + lh.int_gpu_time_success, + lh.int_gpu_time_fail, + lh.int_frame_count, + lh.int_layer_count, + lh.int_waiting_count, + lh.int_dead_count, + lh.int_depend_count, + lh.int_eaten_count, + lh.int_succeeded_count, + lh.int_running_count, + lh.int_max_rss, + lh.int_gpu_mem_max, + lh.b_archived, + lh.str_services, + s.str_name AS str_show_name, + lh.dt_last_modified + FROM public.layer_history lh, + public.job_history jh, + public.show s + WHERE (((lh.pk_job)::text = (jh.pk_job)::text) AND ((jh.pk_show)::text = (s.pk_show)::text) AND (jh.dt_last_modified >= ( SELECT history_period.dt_begin + FROM public.history_period)) AND (jh.dt_last_modified < ( SELECT history_period.dt_end + FROM public.history_period))); + + +ALTER TABLE public.v_history_layer OWNER TO cuebot; + +-- +-- TOC entry 276 (class 1259 OID 17914) +-- Name: vs_alloc_usage; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.vs_alloc_usage AS + SELECT alloc.pk_alloc, + COALESCE(sum(host.int_cores), (0)::numeric) AS int_cores, + COALESCE(sum(host.int_cores_idle), (0)::numeric) AS int_idle_cores, + COALESCE(sum((host.int_cores - host.int_cores_idle)), (0)::numeric) AS int_running_cores, + COALESCE(( SELECT sum(host_1.int_cores) AS sum + FROM public.host host_1 + WHERE (((host_1.pk_alloc)::text = (alloc.pk_alloc)::text) AND (((host_1.str_lock_state)::text = 'NIMBY_LOCKED'::text) OR ((host_1.str_lock_state)::text = 'LOCKED'::text)))), (0)::numeric) AS int_locked_cores, + COALESCE(( SELECT sum(h.int_cores_idle) AS sum + FROM public.host h, + public.host_stat hs + WHERE (((h.pk_host)::text = (hs.pk_host)::text) AND ((h.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((h.str_lock_state)::text = 'OPEN'::text) AND ((hs.str_state)::text = 'UP'::text))), (0)::numeric) AS int_available_cores, + COALESCE(sum(host.int_gpus), (0)::numeric) AS int_gpus, + COALESCE(sum(host.int_gpus_idle), (0)::numeric) AS int_idle_gpus, + COALESCE(sum((host.int_gpus - host.int_gpus_idle)), (0)::numeric) AS int_running_gpus, + COALESCE(( SELECT sum(host_1.int_gpus) AS sum + FROM public.host host_1 + WHERE (((host_1.pk_alloc)::text = (alloc.pk_alloc)::text) AND (((host_1.str_lock_state)::text = 'NIMBY_LOCKED'::text) OR ((host_1.str_lock_state)::text = 'LOCKED'::text)))), (0)::numeric) AS int_locked_gpus, + COALESCE(( SELECT sum(h.int_gpus_idle) AS sum + FROM public.host h, + public.host_stat hs + WHERE (((h.pk_host)::text = (hs.pk_host)::text) AND ((h.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((h.str_lock_state)::text = 'OPEN'::text) AND ((hs.str_state)::text = 'UP'::text))), (0)::numeric) AS int_available_gpus, + count(host.pk_host) AS int_hosts, + ( SELECT count(*) AS count + FROM public.host host_1 + WHERE (((host_1.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((host_1.str_lock_state)::text = 'LOCKED'::text))) AS int_locked_hosts, + ( SELECT count(*) AS count + FROM public.host h, + public.host_stat hs + WHERE (((h.pk_host)::text = (hs.pk_host)::text) AND ((h.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((hs.str_state)::text = 'DOWN'::text))) AS int_down_hosts + FROM (public.alloc + LEFT JOIN public.host ON (((alloc.pk_alloc)::text = (host.pk_alloc)::text))) + GROUP BY alloc.pk_alloc; + + +ALTER TABLE public.vs_alloc_usage OWNER TO cuebot; + +-- +-- TOC entry 277 (class 1259 OID 17919) +-- Name: vs_folder_counts; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.vs_folder_counts AS + SELECT folder.pk_folder, + COALESCE(sum(job_stat.int_depend_count), (0)::numeric) AS int_depend_count, + COALESCE(sum(job_stat.int_waiting_count), (0)::numeric) AS int_waiting_count, + COALESCE(sum(job_stat.int_running_count), (0)::numeric) AS int_running_count, + COALESCE(sum(job_stat.int_dead_count), (0)::numeric) AS int_dead_count, + COALESCE(sum(job_resource.int_cores), (0)::numeric) AS int_cores, + COALESCE(sum(job_resource.int_gpus), (0)::bigint) AS int_gpus, + COALESCE(count(job.pk_job), (0)::bigint) AS int_job_count + FROM (((public.folder + LEFT JOIN public.job ON ((((folder.pk_folder)::text = (job.pk_folder)::text) AND ((job.str_state)::text = 'PENDING'::text)))) + LEFT JOIN public.job_stat ON (((job.pk_job)::text = (job_stat.pk_job)::text))) + LEFT JOIN public.job_resource ON (((job.pk_job)::text = (job_resource.pk_job)::text))) + GROUP BY folder.pk_folder; + + +ALTER TABLE public.vs_folder_counts OWNER TO cuebot; + +-- +-- TOC entry 275 (class 1259 OID 17909) +-- Name: vs_job_resource; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.vs_job_resource AS + SELECT job.pk_job, + count(proc.pk_proc) AS int_procs, + COALESCE(sum(proc.int_cores_reserved), (0)::numeric) AS int_cores, + COALESCE(sum(proc.int_gpus_reserved), (0)::bigint) AS int_gpus, + COALESCE(sum(proc.int_mem_reserved), (0)::numeric) AS int_mem_reserved + FROM (public.job + LEFT JOIN public.proc ON (((proc.pk_job)::text = (job.pk_job)::text))) + GROUP BY job.pk_job; + + +ALTER TABLE public.vs_job_resource OWNER TO cuebot; + +-- +-- TOC entry 274 (class 1259 OID 17904) +-- Name: vs_show_resource; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.vs_show_resource AS + SELECT job.pk_show, + sum(job_resource.int_cores) AS int_cores, + sum(job_resource.int_gpus) AS int_gpus + FROM public.job, + public.job_resource + WHERE (((job.pk_job)::text = (job_resource.pk_job)::text) AND ((job.str_state)::text = 'PENDING'::text)) + GROUP BY job.pk_show; + + +ALTER TABLE public.vs_show_resource OWNER TO cuebot; + +-- +-- TOC entry 268 (class 1259 OID 17122) +-- Name: vs_show_stat; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.vs_show_stat AS + SELECT job.pk_show, + sum((job_stat.int_waiting_count + job_stat.int_depend_count)) AS int_pending_count, + sum(job_stat.int_running_count) AS int_running_count, + sum(job_stat.int_dead_count) AS int_dead_count, + count(1) AS int_job_count + FROM public.job_stat, + public.job + WHERE (((job_stat.pk_job)::text = (job.pk_job)::text) AND ((job.str_state)::text = 'PENDING'::text)) + GROUP BY job.pk_show; + + +ALTER TABLE public.vs_show_stat OWNER TO cuebot; + +-- +-- TOC entry 269 (class 1259 OID 17142) +-- Name: vs_waiting; Type: VIEW; Schema: public; Owner: cuebot +-- + +CREATE VIEW public.vs_waiting AS + SELECT job.pk_show + FROM public.job_resource jr, + public.job_stat, + public.job + WHERE (((job_stat.pk_job)::text = (job.pk_job)::text) AND ((jr.pk_job)::text = (job.pk_job)::text) AND ((job.str_state)::text = 'PENDING'::text) AND (job.b_paused = false) AND ((jr.int_max_cores - jr.int_cores) >= 100) AND (job_stat.int_waiting_count <> 0)) + GROUP BY job.pk_show; + + +ALTER TABLE public.vs_waiting OWNER TO cuebot; + +-- +-- TOC entry 3457 (class 2604 OID 17976) +-- Name: layer_output ser_order; Type: DEFAULT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_output ALTER COLUMN ser_order SET DEFAULT nextval('public.layer_output_ser_order_seq'::regclass); + + +-- +-- TOC entry 4031 (class 2606 OID 17052) +-- Name: action c_action_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.action + ADD CONSTRAINT c_action_pk PRIMARY KEY (pk_action); + + +-- +-- TOC entry 4026 (class 2606 OID 17053) +-- Name: alloc c_alloc_name_uniq; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.alloc + ADD CONSTRAINT c_alloc_name_uniq UNIQUE (str_name); + + +-- +-- TOC entry 4028 (class 2606 OID 17054) +-- Name: alloc c_alloc_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.alloc + ADD CONSTRAINT c_alloc_pk PRIMARY KEY (pk_alloc); + + +-- +-- TOC entry 4022 (class 2606 OID 17055) +-- Name: comments c_comment_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.comments + ADD CONSTRAINT c_comment_pk PRIMARY KEY (pk_comment); + + +-- +-- TOC entry 4005 (class 2606 OID 17058) +-- Name: depend c_depend_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.depend + ADD CONSTRAINT c_depend_pk PRIMARY KEY (pk_depend); + + +-- +-- TOC entry 3835 (class 2606 OID 17106) +-- Name: dept c_dept_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.dept + ADD CONSTRAINT c_dept_pk PRIMARY KEY (pk_dept); + + +-- +-- TOC entry 3837 (class 2606 OID 17105) +-- Name: facility c_facility_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.facility + ADD CONSTRAINT c_facility_pk PRIMARY KEY (pk_facility); + + +-- +-- TOC entry 4002 (class 2606 OID 17059) +-- Name: filter c_filter_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.filter + ADD CONSTRAINT c_filter_pk PRIMARY KEY (pk_filter); + + +-- +-- TOC entry 3991 (class 2606 OID 17062) +-- Name: folder_level c_folder_level_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder_level + ADD CONSTRAINT c_folder_level_pk PRIMARY KEY (pk_folder_level); + + +-- +-- TOC entry 3993 (class 2606 OID 17063) +-- Name: folder_level c_folder_level_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder_level + ADD CONSTRAINT c_folder_level_uk UNIQUE (pk_folder); + + +-- +-- TOC entry 3995 (class 2606 OID 17061) +-- Name: folder c_folder_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder + ADD CONSTRAINT c_folder_pk PRIMARY KEY (pk_folder); + + +-- +-- TOC entry 3825 (class 2606 OID 17108) +-- Name: folder_resource c_folder_resource_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder_resource + ADD CONSTRAINT c_folder_resource_pk PRIMARY KEY (pk_folder_resource); + + +-- +-- TOC entry 3997 (class 2606 OID 17060) +-- Name: folder c_folder_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder + ADD CONSTRAINT c_folder_uk UNIQUE (pk_parent_folder, str_name); + + +-- +-- TOC entry 3756 (class 2606 OID 17116) +-- Name: frame_history c_frame_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame_history + ADD CONSTRAINT c_frame_history_pk PRIMARY KEY (pk_frame_history); + + +-- +-- TOC entry 3982 (class 2606 OID 17064) +-- Name: frame c_frame_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame + ADD CONSTRAINT c_frame_pk PRIMARY KEY (pk_frame); + + +-- +-- TOC entry 4044 (class 2606 OID 17968) +-- Name: frame_state_display_overrides c_frame_state_override; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame_state_display_overrides + ADD CONSTRAINT c_frame_state_override UNIQUE (pk_frame, str_frame_state); + + +-- +-- TOC entry 3984 (class 2606 OID 17065) +-- Name: frame c_frame_str_name_unq; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame + ADD CONSTRAINT c_frame_str_name_unq UNIQUE (str_name, pk_job); + + +-- +-- TOC entry 3766 (class 2606 OID 17115) +-- Name: history_period c_history_period_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.history_period + ADD CONSTRAINT c_history_period_pk PRIMARY KEY (pk); + + +-- +-- TOC entry 3967 (class 2606 OID 17067) +-- Name: host c_host_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host + ADD CONSTRAINT c_host_pk PRIMARY KEY (pk_host); + + +-- +-- TOC entry 3960 (class 2606 OID 17070) +-- Name: host_stat c_host_stat_pk_host_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host_stat + ADD CONSTRAINT c_host_stat_pk_host_uk UNIQUE (pk_host); + + +-- +-- TOC entry 3861 (class 2606 OID 17100) +-- Name: host_tag c_host_tag_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host_tag + ADD CONSTRAINT c_host_tag_pk PRIMARY KEY (pk_host_tag); + + +-- +-- TOC entry 3969 (class 2606 OID 17574) +-- Name: host c_host_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host + ADD CONSTRAINT c_host_uk UNIQUE (str_name); + + +-- +-- TOC entry 3962 (class 2606 OID 17069) +-- Name: host_stat c_hoststat_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host_stat + ADD CONSTRAINT c_hoststat_pk PRIMARY KEY (pk_host_stat); + + +-- +-- TOC entry 3942 (class 2606 OID 17073) +-- Name: job_env c_job_env_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_env + ADD CONSTRAINT c_job_env_pk PRIMARY KEY (pk_job_env); + + +-- +-- TOC entry 3849 (class 2606 OID 17102) +-- Name: job_history c_job_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_history + ADD CONSTRAINT c_job_history_pk PRIMARY KEY (pk_job); + + +-- +-- TOC entry 3821 (class 2606 OID 17109) +-- Name: job_mem c_job_mem_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_mem + ADD CONSTRAINT c_job_mem_pk PRIMARY KEY (pk_job_mem); + + +-- +-- TOC entry 3945 (class 2606 OID 17071) +-- Name: job c_job_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job + ADD CONSTRAINT c_job_pk PRIMARY KEY (pk_job); + + +-- +-- TOC entry 3839 (class 2606 OID 17104) +-- Name: job_post c_job_post_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_post + ADD CONSTRAINT c_job_post_pk PRIMARY KEY (pk_job_post); + + +-- +-- TOC entry 3869 (class 2606 OID 17096) +-- Name: job_resource c_job_resource_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_resource + ADD CONSTRAINT c_job_resource_pk PRIMARY KEY (pk_job_resource); + + +-- +-- TOC entry 3871 (class 2606 OID 17097) +-- Name: job_resource c_job_resource_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_resource + ADD CONSTRAINT c_job_resource_uk UNIQUE (pk_job); + + +-- +-- TOC entry 3881 (class 2606 OID 17095) +-- Name: job_stat c_job_stat_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_stat + ADD CONSTRAINT c_job_stat_pk PRIMARY KEY (pk_job_stat); + + +-- +-- TOC entry 3947 (class 2606 OID 17072) +-- Name: job c_job_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job + ADD CONSTRAINT c_job_uk UNIQUE (str_visible_name); + + +-- +-- TOC entry 3865 (class 2606 OID 17098) +-- Name: job_usage c_job_usage_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_usage + ADD CONSTRAINT c_job_usage_pk PRIMARY KEY (pk_job_usage); + + +-- +-- TOC entry 3867 (class 2606 OID 17099) +-- Name: job_usage c_job_usage_pk_job_uniq; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_usage + ADD CONSTRAINT c_job_usage_pk_job_uniq UNIQUE (pk_job); + + +-- +-- TOC entry 3920 (class 2606 OID 17083) +-- Name: layer_env c_layer_env_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_env + ADD CONSTRAINT c_layer_env_pk PRIMARY KEY (pk_layer_env); + + +-- +-- TOC entry 3843 (class 2606 OID 17103) +-- Name: layer_history c_layer_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_history + ADD CONSTRAINT c_layer_history_pk PRIMARY KEY (pk_layer); + + +-- +-- TOC entry 3816 (class 2606 OID 17110) +-- Name: layer_mem c_layer_mem_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_mem + ADD CONSTRAINT c_layer_mem_pk PRIMARY KEY (pk_layer_mem); + + +-- +-- TOC entry 3924 (class 2606 OID 17081) +-- Name: layer c_layer_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer + ADD CONSTRAINT c_layer_pk PRIMARY KEY (pk_layer); + + +-- +-- TOC entry 3926 (class 2606 OID 17082) +-- Name: layer c_layer_str_name_unq; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer + ADD CONSTRAINT c_layer_str_name_unq UNIQUE (str_name, pk_job); + + +-- +-- TOC entry 3905 (class 2606 OID 17087) +-- Name: layer_usage c_layer_usage_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_usage + ADD CONSTRAINT c_layer_usage_pk PRIMARY KEY (pk_layer_usage); + + +-- +-- TOC entry 3907 (class 2606 OID 17088) +-- Name: layer_usage c_layer_usage_pk_layer_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_usage + ADD CONSTRAINT c_layer_usage_pk_layer_uk UNIQUE (pk_layer); + + +-- +-- TOC entry 3915 (class 2606 OID 17084) +-- Name: layer_resource c_layerresource_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_resource + ADD CONSTRAINT c_layerresource_pk PRIMARY KEY (pk_layer_resource); + + +-- +-- TOC entry 3917 (class 2606 OID 17085) +-- Name: layer_resource c_layerresource_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_resource + ADD CONSTRAINT c_layerresource_uk UNIQUE (pk_layer); + + +-- +-- TOC entry 3910 (class 2606 OID 17086) +-- Name: layer_stat c_layerstat_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_stat + ADD CONSTRAINT c_layerstat_pk PRIMARY KEY (pk_layer_stat); + + +-- +-- TOC entry 3902 (class 2606 OID 17089) +-- Name: matcher c_matcher_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.matcher + ADD CONSTRAINT c_matcher_pk PRIMARY KEY (pk_matcher); + + +-- +-- TOC entry 3778 (class 2606 OID 17078) +-- Name: deed c_pk_deed; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.deed + ADD CONSTRAINT c_pk_deed PRIMARY KEY (pk_deed); + + +-- +-- TOC entry 3786 (class 2606 OID 17076) +-- Name: host_local c_pk_host_local; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host_local + ADD CONSTRAINT c_pk_host_local PRIMARY KEY (pk_host_local); + + +-- +-- TOC entry 3800 (class 2606 OID 17074) +-- Name: job_local c_pk_job_local; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_local + ADD CONSTRAINT c_pk_job_local PRIMARY KEY (pk_job_local); + + +-- +-- TOC entry 3768 (class 2606 OID 17080) +-- Name: layer_output c_pk_layer_output; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_output + ADD CONSTRAINT c_pk_layer_output PRIMARY KEY (pk_layer_output); + + +-- +-- TOC entry 3782 (class 2606 OID 17077) +-- Name: owner c_pk_owner; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.owner + ADD CONSTRAINT c_pk_owner PRIMARY KEY (pk_owner); + + +-- +-- TOC entry 4018 (class 2606 OID 17056) +-- Name: config c_pk_pkconfig; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.config + ADD CONSTRAINT c_pk_pkconfig PRIMARY KEY (pk_config); + + +-- +-- TOC entry 3795 (class 2606 OID 17075) +-- Name: service c_pk_service; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.service + ADD CONSTRAINT c_pk_service PRIMARY KEY (pk_service); + + +-- +-- TOC entry 3773 (class 2606 OID 17079) +-- Name: show_service c_pk_show_service; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.show_service + ADD CONSTRAINT c_pk_show_service PRIMARY KEY (pk_show_service); + + +-- +-- TOC entry 3809 (class 2606 OID 17111) +-- Name: point c_point_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.point + ADD CONSTRAINT c_point_pk PRIMARY KEY (pk_point); + + +-- +-- TOC entry 3811 (class 2606 OID 17112) +-- Name: point c_point_pk_show_dept; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.point + ADD CONSTRAINT c_point_pk_show_dept UNIQUE (pk_show, pk_dept); + + +-- +-- TOC entry 3893 (class 2606 OID 17090) +-- Name: proc c_proc_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.proc + ADD CONSTRAINT c_proc_pk PRIMARY KEY (pk_proc); + + +-- +-- TOC entry 3895 (class 2606 OID 17091) +-- Name: proc c_proc_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.proc + ADD CONSTRAINT c_proc_uk UNIQUE (pk_frame); + + +-- +-- TOC entry 4035 (class 2606 OID 17117) +-- Name: redirect c_redirect_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.redirect + ADD CONSTRAINT c_redirect_pk PRIMARY KEY (pk_proc); + + +-- +-- TOC entry 3832 (class 2606 OID 17107) +-- Name: show_alias c_show_alias_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.show_alias + ADD CONSTRAINT c_show_alias_pk PRIMARY KEY (pk_show_alias); + + +-- +-- TOC entry 3891 (class 2606 OID 17092) +-- Name: show c_show_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.show + ADD CONSTRAINT c_show_pk PRIMARY KEY (pk_show); + + +-- +-- TOC entry 4042 (class 2606 OID 17955) +-- Name: show_stats c_show_stats_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.show_stats + ADD CONSTRAINT c_show_stats_pk PRIMARY KEY (pk_show); + + +-- +-- TOC entry 4020 (class 2606 OID 17057) +-- Name: config c_show_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.config + ADD CONSTRAINT c_show_uk UNIQUE (str_key); + + +-- +-- TOC entry 3971 (class 2606 OID 17066) +-- Name: host c_str_host_fqdn_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host + ADD CONSTRAINT c_str_host_fqdn_uk UNIQUE (str_fqdn); + + +-- +-- TOC entry 3885 (class 2606 OID 17093) +-- Name: subscription c_subscription_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.subscription + ADD CONSTRAINT c_subscription_pk PRIMARY KEY (pk_subscription); + + +-- +-- TOC entry 3887 (class 2606 OID 17094) +-- Name: subscription c_subscription_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.subscription + ADD CONSTRAINT c_subscription_uk UNIQUE (pk_show, pk_alloc); + + +-- +-- TOC entry 3859 (class 2606 OID 17101) +-- Name: task_lock c_task_lock_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.task_lock + ADD CONSTRAINT c_task_lock_pk PRIMARY KEY (pk_task_lock); + + +-- +-- TOC entry 3804 (class 2606 OID 17113) +-- Name: task c_task_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.task + ADD CONSTRAINT c_task_pk PRIMARY KEY (pk_task); + + +-- +-- TOC entry 3806 (class 2606 OID 17114) +-- Name: task c_task_uniq; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.task + ADD CONSTRAINT c_task_uniq UNIQUE (str_shot, pk_point); + + +-- +-- TOC entry 3753 (class 2606 OID 16392) +-- Name: flyway_schema_history flyway_schema_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.flyway_schema_history + ADD CONSTRAINT flyway_schema_history_pk PRIMARY KEY (installed_rank); + + +-- +-- TOC entry 3754 (class 1259 OID 16393) +-- Name: flyway_schema_history_s_idx; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX flyway_schema_history_s_idx ON public.flyway_schema_history USING btree (success); + + +-- +-- TOC entry 4032 (class 1259 OID 16858) +-- Name: i_action_pk_filter; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_action_pk_filter ON public.action USING btree (pk_filter); + + +-- +-- TOC entry 4033 (class 1259 OID 16859) +-- Name: i_action_pk_group; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_action_pk_group ON public.action USING btree (pk_folder); + + +-- +-- TOC entry 4029 (class 1259 OID 16861) +-- Name: i_alloc_pk_facility; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_alloc_pk_facility ON public.alloc USING btree (pk_facility); + + +-- +-- TOC entry 3948 (class 1259 OID 16909) +-- Name: i_booking_3; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_booking_3 ON public.job USING btree (str_state, b_paused, pk_show, pk_facility); + + +-- +-- TOC entry 4023 (class 1259 OID 16865) +-- Name: i_comment_pk_host; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_comment_pk_host ON public.comments USING btree (pk_host); + + +-- +-- TOC entry 4024 (class 1259 OID 16864) +-- Name: i_comment_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_comment_pk_job ON public.comments USING btree (pk_job); + + +-- +-- TOC entry 3779 (class 1259 OID 17030) +-- Name: i_deed_pk_host; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_deed_pk_host ON public.deed USING btree (pk_host); + + +-- +-- TOC entry 3780 (class 1259 OID 17031) +-- Name: i_deed_pk_owner; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_deed_pk_owner ON public.deed USING btree (pk_owner); + + +-- +-- TOC entry 4006 (class 1259 OID 16875) +-- Name: i_depend_b_composite; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_b_composite ON public.depend USING btree (b_composite); + + +-- +-- TOC entry 4007 (class 1259 OID 16874) +-- Name: i_depend_er_frame; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_er_frame ON public.depend USING btree (pk_frame_depend_er); + + +-- +-- TOC entry 4008 (class 1259 OID 16870) +-- Name: i_depend_er_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_er_layer ON public.depend USING btree (pk_layer_depend_er); + + +-- +-- TOC entry 4009 (class 1259 OID 16872) +-- Name: i_depend_on_frame; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_on_frame ON public.depend USING btree (pk_frame_depend_on); + + +-- +-- TOC entry 4010 (class 1259 OID 16869) +-- Name: i_depend_on_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_on_layer ON public.depend USING btree (pk_layer_depend_on); + + +-- +-- TOC entry 4011 (class 1259 OID 16879) +-- Name: i_depend_pk_er_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_pk_er_job ON public.depend USING btree (pk_job_depend_er); + + +-- +-- TOC entry 4012 (class 1259 OID 16878) +-- Name: i_depend_pk_on_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_pk_on_job ON public.depend USING btree (pk_job_depend_on); + + +-- +-- TOC entry 4013 (class 1259 OID 16877) +-- Name: i_depend_pkparent; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_pkparent ON public.depend USING btree (pk_parent); + + +-- +-- TOC entry 4014 (class 1259 OID 16868) +-- Name: i_depend_signature; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_depend_signature ON public.depend USING btree (str_signature); + + +-- +-- TOC entry 4015 (class 1259 OID 16871) +-- Name: i_depend_str_target; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_str_target ON public.depend USING btree (str_target); + + +-- +-- TOC entry 4016 (class 1259 OID 16873) +-- Name: i_depend_str_type; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_depend_str_type ON public.depend USING btree (str_type); + + +-- +-- TOC entry 4003 (class 1259 OID 16881) +-- Name: i_filters_pk_show; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_filters_pk_show ON public.filter USING btree (pk_show); + + +-- +-- TOC entry 3998 (class 1259 OID 16883) +-- Name: i_folder_pkparentfolder; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folder_pkparentfolder ON public.folder USING btree (pk_parent_folder); + + +-- +-- TOC entry 3999 (class 1259 OID 16884) +-- Name: i_folder_pkshow; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folder_pkshow ON public.folder USING btree (pk_show); + + +-- +-- TOC entry 3826 (class 1259 OID 16996) +-- Name: i_folder_res_int_max_cores; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folder_res_int_max_cores ON public.folder_resource USING btree (int_max_cores); + + +-- +-- TOC entry 3827 (class 1259 OID 17747) +-- Name: i_folder_res_int_max_gpus; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folder_res_int_max_gpus ON public.folder_resource USING btree (int_max_gpus); + + +-- +-- TOC entry 3828 (class 1259 OID 16997) +-- Name: i_folder_resource_fl_tier; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folder_resource_fl_tier ON public.folder_resource USING btree (float_tier); + + +-- +-- TOC entry 3829 (class 1259 OID 17961) +-- Name: i_folder_resource_int_cores; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folder_resource_int_cores ON public.folder_resource USING btree (int_cores); + + +-- +-- TOC entry 4000 (class 1259 OID 16885) +-- Name: i_folder_strname; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folder_strname ON public.folder USING btree (str_name); + + +-- +-- TOC entry 3830 (class 1259 OID 16998) +-- Name: i_folderresource_pkfolder; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_folderresource_pkfolder ON public.folder_resource USING btree (pk_folder); + + +-- +-- TOC entry 3985 (class 1259 OID 16890) +-- Name: i_frame_dispatch_idx; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_dispatch_idx ON public.frame USING btree (int_dispatch_order, int_layer_order); + + +-- +-- TOC entry 3757 (class 1259 OID 17040) +-- Name: i_frame_history_int_exit_stat; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_int_exit_stat ON public.frame_history USING btree (int_exit_status); + + +-- +-- TOC entry 3758 (class 1259 OID 17041) +-- Name: i_frame_history_int_ts_stopped; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_int_ts_stopped ON public.frame_history USING btree (int_ts_stopped); + + +-- +-- TOC entry 3759 (class 1259 OID 17042) +-- Name: i_frame_history_pk_alloc; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_pk_alloc ON public.frame_history USING btree (pk_alloc); + + +-- +-- TOC entry 3760 (class 1259 OID 17043) +-- Name: i_frame_history_pk_frame; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_pk_frame ON public.frame_history USING btree (pk_frame); + + +-- +-- TOC entry 3761 (class 1259 OID 17044) +-- Name: i_frame_history_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_pk_job ON public.frame_history USING btree (pk_job); + + +-- +-- TOC entry 3762 (class 1259 OID 17045) +-- Name: i_frame_history_pk_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_pk_layer ON public.frame_history USING btree (pk_layer); + + +-- +-- TOC entry 3763 (class 1259 OID 17046) +-- Name: i_frame_history_str_state; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_str_state ON public.frame_history USING btree (str_state); + + +-- +-- TOC entry 3764 (class 1259 OID 17039) +-- Name: i_frame_history_ts_start_stop; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_history_ts_start_stop ON public.frame_history USING btree (int_ts_started, int_ts_stopped); + + +-- +-- TOC entry 3986 (class 1259 OID 17883) +-- Name: i_frame_int_gpu_mem_reserved; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_int_gpu_mem_reserved ON public.frame USING btree (int_gpu_mem_reserved); + + +-- +-- TOC entry 3987 (class 1259 OID 16891) +-- Name: i_frame_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_pk_job ON public.frame USING btree (pk_job); + + +-- +-- TOC entry 3988 (class 1259 OID 16893) +-- Name: i_frame_pkjoblayer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_pkjoblayer ON public.frame USING btree (pk_layer); + + +-- +-- TOC entry 3989 (class 1259 OID 16889) +-- Name: i_frame_state_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_frame_state_job ON public.frame USING btree (str_state, pk_job); + + +-- +-- TOC entry 3972 (class 1259 OID 17847) +-- Name: i_host_int_gpu; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_int_gpu ON public.host USING btree (int_gpu_mem); + + +-- +-- TOC entry 3973 (class 1259 OID 17862) +-- Name: i_host_int_gpu_idle; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_int_gpu_idle ON public.host USING btree (int_gpu_mem_idle); + + +-- +-- TOC entry 3974 (class 1259 OID 17878) +-- Name: i_host_int_gpu_mem; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_int_gpu_mem ON public.host USING btree (int_gpu_mem); + + +-- +-- TOC entry 3975 (class 1259 OID 17879) +-- Name: i_host_int_gpu_mem_idle; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_int_gpu_mem_idle ON public.host USING btree (int_gpu_mem_idle); + + +-- +-- TOC entry 3976 (class 1259 OID 17880) +-- Name: i_host_int_gpus; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_int_gpus ON public.host USING btree (int_gpus); + + +-- +-- TOC entry 3977 (class 1259 OID 17881) +-- Name: i_host_int_gpus_idle; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_int_gpus_idle ON public.host USING btree (int_gpus_idle); + + +-- +-- TOC entry 3787 (class 1259 OID 17020) +-- Name: i_host_local; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_local ON public.host_local USING btree (pk_host); + + +-- +-- TOC entry 3788 (class 1259 OID 17702) +-- Name: i_host_local_int_gpu_idle; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_local_int_gpu_idle ON public.host_local USING btree (int_gpu_mem_idle); + + +-- +-- TOC entry 3789 (class 1259 OID 17713) +-- Name: i_host_local_int_gpu_max; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_local_int_gpu_max ON public.host_local USING btree (int_gpu_mem_max); + + +-- +-- TOC entry 3790 (class 1259 OID 17725) +-- Name: i_host_local_int_gpus_idle; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_local_int_gpus_idle ON public.host_local USING btree (int_gpus_idle); + + +-- +-- TOC entry 3791 (class 1259 OID 17726) +-- Name: i_host_local_int_gpus_max; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_local_int_gpus_max ON public.host_local USING btree (int_gpus_max); + + +-- +-- TOC entry 3792 (class 1259 OID 17022) +-- Name: i_host_local_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_local_pk_job ON public.host_local USING btree (pk_job); + + +-- +-- TOC entry 3793 (class 1259 OID 17023) +-- Name: i_host_local_unique; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_host_local_unique ON public.host_local USING btree (pk_host, pk_job); + + +-- +-- TOC entry 3978 (class 1259 OID 16899) +-- Name: i_host_pkalloc; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_pkalloc ON public.host USING btree (pk_alloc); + + +-- +-- TOC entry 3963 (class 1259 OID 17837) +-- Name: i_host_stat_int_gpu_mem_free; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_stat_int_gpu_mem_free ON public.host_stat USING btree (int_gpu_mem_free); + + +-- +-- TOC entry 3964 (class 1259 OID 17827) +-- Name: i_host_stat_int_gpu_mem_total; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_stat_int_gpu_mem_total ON public.host_stat USING btree (int_gpu_mem_total); + + +-- +-- TOC entry 3965 (class 1259 OID 16906) +-- Name: i_host_stat_str_os; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_stat_str_os ON public.host_stat USING btree (str_os); + + +-- +-- TOC entry 3862 (class 1259 OID 16972) +-- Name: i_host_str_tag_type; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_str_tag_type ON public.host_tag USING btree (str_tag_type); + + +-- +-- TOC entry 3979 (class 1259 OID 16903) +-- Name: i_host_str_tags; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_str_tags ON public.host USING btree (str_tags); + + +-- +-- TOC entry 3980 (class 1259 OID 16900) +-- Name: i_host_strlockstate; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_strlockstate ON public.host USING btree (str_lock_state); + + +-- +-- TOC entry 3863 (class 1259 OID 16971) +-- Name: i_host_tag_pk_host; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_host_tag_pk_host ON public.host_tag USING btree (pk_host); + + +-- +-- TOC entry 3943 (class 1259 OID 16921) +-- Name: i_job_env_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_env_pk_job ON public.job_env USING btree (pk_job); + + +-- +-- TOC entry 3850 (class 1259 OID 16976) +-- Name: i_job_history_b_archived; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_b_archived ON public.job_history USING btree (b_archived); + + +-- +-- TOC entry 3851 (class 1259 OID 16981) +-- Name: i_job_history_pk_dept; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_pk_dept ON public.job_history USING btree (pk_dept); + + +-- +-- TOC entry 3852 (class 1259 OID 16982) +-- Name: i_job_history_pk_facility; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_pk_facility ON public.job_history USING btree (pk_facility); + + +-- +-- TOC entry 3853 (class 1259 OID 16975) +-- Name: i_job_history_pk_show; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_pk_show ON public.job_history USING btree (pk_show); + + +-- +-- TOC entry 3854 (class 1259 OID 16978) +-- Name: i_job_history_str_name; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_str_name ON public.job_history USING btree (str_name); + + +-- +-- TOC entry 3855 (class 1259 OID 16979) +-- Name: i_job_history_str_shot; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_str_shot ON public.job_history USING btree (str_shot); + + +-- +-- TOC entry 3856 (class 1259 OID 16980) +-- Name: i_job_history_str_user; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_str_user ON public.job_history USING btree (str_user); + + +-- +-- TOC entry 3857 (class 1259 OID 16977) +-- Name: i_job_history_ts_start_stop; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_history_ts_start_stop ON public.job_history USING btree (int_ts_started, int_ts_stopped); + + +-- +-- TOC entry 3949 (class 1259 OID 17960) +-- Name: i_job_int_min_cores; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_int_min_cores ON public.job USING btree (int_min_cores); + + +-- +-- TOC entry 3801 (class 1259 OID 17016) +-- Name: i_job_local_pk_host; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_job_local_pk_host ON public.job_local USING btree (pk_host); + + +-- +-- TOC entry 3802 (class 1259 OID 17015) +-- Name: i_job_local_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_job_local_pk_job ON public.job_local USING btree (pk_job); + + +-- +-- TOC entry 3822 (class 1259 OID 17821) +-- Name: i_job_mem_int_max_rss; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_mem_int_max_rss ON public.job_mem USING btree (int_max_rss); + + +-- +-- TOC entry 3823 (class 1259 OID 17000) +-- Name: i_job_mem_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_job_mem_pk_job ON public.job_mem USING btree (pk_job); + + +-- +-- TOC entry 3950 (class 1259 OID 16911) +-- Name: i_job_pk_dept; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_pk_dept ON public.job USING btree (pk_dept); + + +-- +-- TOC entry 3951 (class 1259 OID 16912) +-- Name: i_job_pk_facility; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_pk_facility ON public.job USING btree (pk_facility); + + +-- +-- TOC entry 3952 (class 1259 OID 16916) +-- Name: i_job_pkgroup; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_pkgroup ON public.job USING btree (pk_folder); + + +-- +-- TOC entry 3953 (class 1259 OID 16917) +-- Name: i_job_pkshow; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_pkshow ON public.job USING btree (pk_show); + + +-- +-- TOC entry 3840 (class 1259 OID 16990) +-- Name: i_job_post_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_post_pk_job ON public.job_post USING btree (pk_job); + + +-- +-- TOC entry 3841 (class 1259 OID 16988) +-- Name: i_job_post_pk_post_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_post_pk_post_job ON public.job_post USING btree (pk_post_job); + + +-- +-- TOC entry 3872 (class 1259 OID 16966) +-- Name: i_job_resource_cores; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_resource_cores ON public.job_resource USING btree (int_cores); + + +-- +-- TOC entry 3873 (class 1259 OID 17766) +-- Name: i_job_resource_gpus; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_resource_gpus ON public.job_resource USING btree (int_gpus); + + +-- +-- TOC entry 3874 (class 1259 OID 17765) +-- Name: i_job_resource_gpus_min_max; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_resource_gpus_min_max ON public.job_resource USING btree (int_min_gpus, int_max_gpus); + + +-- +-- TOC entry 3875 (class 1259 OID 17959) +-- Name: i_job_resource_int_priority; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_resource_int_priority ON public.job_resource USING btree (int_priority); + + +-- +-- TOC entry 3876 (class 1259 OID 16967) +-- Name: i_job_resource_max_c; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_resource_max_c ON public.job_resource USING btree (int_max_cores); + + +-- +-- TOC entry 3877 (class 1259 OID 17767) +-- Name: i_job_resource_max_gpus; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_resource_max_gpus ON public.job_resource USING btree (int_max_gpus); + + +-- +-- TOC entry 3878 (class 1259 OID 16962) +-- Name: i_job_resource_min_max; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_resource_min_max ON public.job_resource USING btree (int_min_cores, int_max_cores); + + +-- +-- TOC entry 3882 (class 1259 OID 16959) +-- Name: i_job_stat_int_waiting_count; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_stat_int_waiting_count ON public.job_stat USING btree (int_waiting_count); + + +-- +-- TOC entry 3883 (class 1259 OID 16960) +-- Name: i_job_stat_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_job_stat_pk_job ON public.job_stat USING btree (pk_job); + + +-- +-- TOC entry 3954 (class 1259 OID 16918) +-- Name: i_job_str_name; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_str_name ON public.job USING btree (str_name); + + +-- +-- TOC entry 3955 (class 1259 OID 16910) +-- Name: i_job_str_os; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_str_os ON public.job USING btree (str_os); + + +-- +-- TOC entry 3956 (class 1259 OID 16913) +-- Name: i_job_str_shot; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_str_shot ON public.job USING btree (str_shot); + + +-- +-- TOC entry 3957 (class 1259 OID 16919) +-- Name: i_job_str_state; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_str_state ON public.job USING btree (str_state); + + +-- +-- TOC entry 3879 (class 1259 OID 16963) +-- Name: i_job_tier; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_tier ON public.job_resource USING btree (float_tier); + + +-- +-- TOC entry 3958 (class 1259 OID 17962) +-- Name: i_job_ts_updated; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_job_ts_updated ON public.job USING btree (ts_updated); + + +-- +-- TOC entry 3927 (class 1259 OID 16922) +-- Name: i_layer_b_threadable; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_b_threadable ON public.layer USING btree (b_threadable); + + +-- +-- TOC entry 3928 (class 1259 OID 17818) +-- Name: i_layer_cores_gpus_mem; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_cores_gpus_mem ON public.layer USING btree (int_cores_min, int_gpus_min, int_mem_min, int_gpu_mem_min); + + +-- +-- TOC entry 3929 (class 1259 OID 17819) +-- Name: i_layer_cores_gpus_mem_thread; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_cores_gpus_mem_thread ON public.layer USING btree (int_cores_min, int_gpus_min, int_mem_min, int_gpu_mem_min, b_threadable); + + +-- +-- TOC entry 3930 (class 1259 OID 16923) +-- Name: i_layer_cores_mem; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_cores_mem ON public.layer USING btree (int_cores_min, int_mem_min); + + +-- +-- TOC entry 3931 (class 1259 OID 16924) +-- Name: i_layer_cores_mem_thread; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_cores_mem_thread ON public.layer USING btree (int_cores_min, int_mem_min, b_threadable); + + +-- +-- TOC entry 3921 (class 1259 OID 16933) +-- Name: i_layer_env_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_env_pk_job ON public.layer_env USING btree (pk_job); + + +-- +-- TOC entry 3922 (class 1259 OID 16934) +-- Name: i_layer_env_pk_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_env_pk_layer ON public.layer_env USING btree (pk_layer); + + +-- +-- TOC entry 3844 (class 1259 OID 16987) +-- Name: i_layer_history_b_archived; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_history_b_archived ON public.layer_history USING btree (b_archived); + + +-- +-- TOC entry 3845 (class 1259 OID 16986) +-- Name: i_layer_history_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_history_pk_job ON public.layer_history USING btree (pk_job); + + +-- +-- TOC entry 3846 (class 1259 OID 16984) +-- Name: i_layer_history_str_name; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_history_str_name ON public.layer_history USING btree (str_name); + + +-- +-- TOC entry 3847 (class 1259 OID 16985) +-- Name: i_layer_history_str_type; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_history_str_type ON public.layer_history USING btree (str_type); + + +-- +-- TOC entry 3932 (class 1259 OID 17958) +-- Name: i_layer_int_cores_max; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_int_cores_max ON public.layer USING btree (int_cores_max); + + +-- +-- TOC entry 3933 (class 1259 OID 16926) +-- Name: i_layer_int_dispatch_order; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_int_dispatch_order ON public.layer USING btree (int_dispatch_order); + + +-- +-- TOC entry 3934 (class 1259 OID 17800) +-- Name: i_layer_int_gpu_mem_min; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_int_gpu_mem_min ON public.layer USING btree (int_gpu_mem_min); + + +-- +-- TOC entry 3935 (class 1259 OID 17957) +-- Name: i_layer_int_gpus_mem_min_1; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_int_gpus_mem_min_1 ON public.layer USING btree (int_gpu_mem_min); + + +-- +-- TOC entry 3936 (class 1259 OID 17956) +-- Name: i_layer_int_gpus_min; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_int_gpus_min ON public.layer USING btree (int_gpus_min); + + +-- +-- TOC entry 4039 (class 1259 OID 17942) +-- Name: i_layer_limit_pk_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_limit_pk_layer ON public.layer_limit USING btree (pk_layer); + + +-- +-- TOC entry 4040 (class 1259 OID 17943) +-- Name: i_layer_limit_pk_limit_record; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_limit_pk_limit_record ON public.layer_limit USING btree (pk_limit_record); + + +-- +-- TOC entry 3817 (class 1259 OID 17792) +-- Name: i_layer_mem_int_max_rss; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_mem_int_max_rss ON public.layer_mem USING btree (int_max_rss); + + +-- +-- TOC entry 3937 (class 1259 OID 16925) +-- Name: i_layer_mem_min; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_mem_min ON public.layer USING btree (int_mem_min); + + +-- +-- TOC entry 3818 (class 1259 OID 17004) +-- Name: i_layer_mem_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_mem_pk_job ON public.layer_mem USING btree (pk_job); + + +-- +-- TOC entry 3819 (class 1259 OID 17003) +-- Name: i_layer_mem_pk_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_layer_mem_pk_layer ON public.layer_mem USING btree (pk_layer); + + +-- +-- TOC entry 3769 (class 1259 OID 17037) +-- Name: i_layer_output_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_output_pk_job ON public.layer_output USING btree (pk_job); + + +-- +-- TOC entry 3770 (class 1259 OID 17036) +-- Name: i_layer_output_pk_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_output_pk_layer ON public.layer_output USING btree (pk_layer); + + +-- +-- TOC entry 3771 (class 1259 OID 17038) +-- Name: i_layer_output_unique; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_layer_output_unique ON public.layer_output USING btree (pk_layer, str_filespec); + + +-- +-- TOC entry 3938 (class 1259 OID 16928) +-- Name: i_layer_pkjob; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_pkjob ON public.layer USING btree (pk_job); + + +-- +-- TOC entry 3918 (class 1259 OID 16937) +-- Name: i_layer_resource_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_resource_pk_job ON public.layer_resource USING btree (pk_job); + + +-- +-- TOC entry 3911 (class 1259 OID 16938) +-- Name: i_layer_stat_pk_layer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_layer_stat_pk_layer ON public.layer_stat USING btree (pk_layer); + + +-- +-- TOC entry 3939 (class 1259 OID 17963) +-- Name: i_layer_str_tags; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_str_tags ON public.layer USING btree (str_tags); + + +-- +-- TOC entry 3940 (class 1259 OID 16929) +-- Name: i_layer_strname; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_strname ON public.layer USING btree (str_name); + + +-- +-- TOC entry 3908 (class 1259 OID 16942) +-- Name: i_layer_usage_pk_job; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layer_usage_pk_job ON public.layer_usage USING btree (pk_job); + + +-- +-- TOC entry 3912 (class 1259 OID 16941) +-- Name: i_layerstat_int_waiting_count; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layerstat_int_waiting_count ON public.layer_stat USING btree (( +CASE + WHEN (int_waiting_count > 0) THEN 1 + ELSE NULL::integer +END), ( +CASE + WHEN (int_waiting_count > 0) THEN pk_layer + ELSE NULL::character varying +END)); + + +-- +-- TOC entry 3913 (class 1259 OID 16940) +-- Name: i_layerstat_pkjob; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_layerstat_pkjob ON public.layer_stat USING btree (pk_job); + + +-- +-- TOC entry 4038 (class 1259 OID 17944) +-- Name: i_limit_record_pk_limit_record; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_limit_record_pk_limit_record ON public.limit_record USING btree (pk_limit_record); + + +-- +-- TOC entry 3903 (class 1259 OID 16946) +-- Name: i_matcher_pk_filter; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_matcher_pk_filter ON public.matcher USING btree (pk_filter); + + +-- +-- TOC entry 3783 (class 1259 OID 17027) +-- Name: i_owner_pk_show; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_owner_pk_show ON public.owner USING btree (pk_show); + + +-- +-- TOC entry 3784 (class 1259 OID 17028) +-- Name: i_owner_str_username; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_owner_str_username ON public.owner USING btree (str_username); + + +-- +-- TOC entry 3812 (class 1259 OID 17007) +-- Name: i_point_pk_dept; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_point_pk_dept ON public.point USING btree (pk_dept); + + +-- +-- TOC entry 3813 (class 1259 OID 17008) +-- Name: i_point_pk_show; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_point_pk_show ON public.point USING btree (pk_show); + + +-- +-- TOC entry 3814 (class 1259 OID 17010) +-- Name: i_point_tier; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_point_tier ON public.point USING btree (float_tier); + + +-- +-- TOC entry 3896 (class 1259 OID 17772) +-- Name: i_proc_int_gpu_mem_reserved; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_proc_int_gpu_mem_reserved ON public.proc USING btree (int_gpu_mem_reserved); + + +-- +-- TOC entry 3897 (class 1259 OID 16949) +-- Name: i_proc_pkhost; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_proc_pkhost ON public.proc USING btree (pk_host); + + +-- +-- TOC entry 3898 (class 1259 OID 16950) +-- Name: i_proc_pkjob; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_proc_pkjob ON public.proc USING btree (pk_job); + + +-- +-- TOC entry 3899 (class 1259 OID 16951) +-- Name: i_proc_pklayer; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_proc_pklayer ON public.proc USING btree (pk_layer); + + +-- +-- TOC entry 3900 (class 1259 OID 16952) +-- Name: i_proc_pkshow; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_proc_pkshow ON public.proc USING btree (pk_show); + + +-- +-- TOC entry 4036 (class 1259 OID 17051) +-- Name: i_redirect_create; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_redirect_create ON public.redirect USING btree (lng_creation_time); + + +-- +-- TOC entry 4037 (class 1259 OID 17050) +-- Name: i_redirect_group; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_redirect_group ON public.redirect USING btree (str_group_id); + + +-- +-- TOC entry 3796 (class 1259 OID 17728) +-- Name: i_service_int_gpu_mem_min; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_service_int_gpu_mem_min ON public.service USING btree (int_gpu_mem_min); + + +-- +-- TOC entry 3797 (class 1259 OID 17737) +-- Name: i_service_int_gpus_min; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_service_int_gpus_min ON public.service USING btree (int_gpus_min); + + +-- +-- TOC entry 3798 (class 1259 OID 17018) +-- Name: i_service_str_name; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_service_str_name ON public.service USING btree (str_name); + + +-- +-- TOC entry 3833 (class 1259 OID 16993) +-- Name: i_show_alias_pk_show; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_show_alias_pk_show ON public.show_alias USING btree (pk_show); + + +-- +-- TOC entry 3774 (class 1259 OID 17671) +-- Name: i_show_service_int_gpu_mem_min; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_show_service_int_gpu_mem_min ON public.show_service USING btree (int_gpu_mem_min); + + +-- +-- TOC entry 3775 (class 1259 OID 17680) +-- Name: i_show_service_int_gpus_min; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_show_service_int_gpus_min ON public.show_service USING btree (int_gpus_min); + + +-- +-- TOC entry 3776 (class 1259 OID 17033) +-- Name: i_show_service_str_name; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE UNIQUE INDEX i_show_service_str_name ON public.show_service USING btree (str_name, pk_show); + + +-- +-- TOC entry 3888 (class 1259 OID 16955) +-- Name: i_sub_tier; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_sub_tier ON public.subscription USING btree (float_tier); + + +-- +-- TOC entry 3889 (class 1259 OID 16958) +-- Name: i_subscription_pkalloc; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_subscription_pkalloc ON public.subscription USING btree (pk_alloc); + + +-- +-- TOC entry 3807 (class 1259 OID 17012) +-- Name: i_task_pk_point; Type: INDEX; Schema: public; Owner: cuebot +-- + +CREATE INDEX i_task_pk_point ON public.task USING btree (pk_point); + + +-- +-- TOC entry 4138 (class 2620 OID 17513) +-- Name: folder after_insert_folder; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER after_insert_folder AFTER INSERT ON public.folder FOR EACH ROW EXECUTE FUNCTION public.trigger__after_insert_folder(); + + +-- +-- TOC entry 4125 (class 2620 OID 17485) +-- Name: job after_insert_job; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER after_insert_job AFTER INSERT ON public.job FOR EACH ROW EXECUTE FUNCTION public.trigger__after_insert_job(); + + +-- +-- TOC entry 4122 (class 2620 OID 17493) +-- Name: layer after_insert_layer; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER after_insert_layer AFTER INSERT ON public.layer FOR EACH ROW EXECUTE FUNCTION public.trigger__after_insert_layer(); + + +-- +-- TOC entry 4126 (class 2620 OID 17487) +-- Name: job after_job_dept_update; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER after_job_dept_update AFTER UPDATE ON public.job FOR EACH ROW WHEN ((((new.pk_dept)::text <> (old.pk_dept)::text) AND ((new.str_state)::text = 'PENDING'::text))) EXECUTE FUNCTION public.trigger__after_job_dept_update(); + + +-- +-- TOC entry 4127 (class 2620 OID 17483) +-- Name: job after_job_finished; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER after_job_finished AFTER UPDATE ON public.job FOR EACH ROW WHEN ((((old.str_state)::text = 'PENDING'::text) AND ((new.str_state)::text = 'FINISHED'::text))) EXECUTE FUNCTION public.trigger__after_job_finished(); + + +-- +-- TOC entry 4128 (class 2620 OID 17479) +-- Name: job after_job_moved; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER after_job_moved AFTER UPDATE ON public.job FOR EACH ROW WHEN (((new.pk_folder)::text <> (old.pk_folder)::text)) EXECUTE FUNCTION public.trigger__after_job_moved(); + + +-- +-- TOC entry 4139 (class 2620 OID 17511) +-- Name: folder before_delete_folder; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER before_delete_folder BEFORE DELETE ON public.folder FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_folder(); + + +-- +-- TOC entry 4130 (class 2620 OID 17501) +-- Name: host before_delete_host; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER before_delete_host BEFORE DELETE ON public.host FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_host(); + + +-- +-- TOC entry 4129 (class 2620 OID 17481) +-- Name: job before_delete_job; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER before_delete_job BEFORE DELETE ON public.job FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_job(); + + +-- +-- TOC entry 4123 (class 2620 OID 17495) +-- Name: layer before_delete_layer; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER before_delete_layer BEFORE DELETE ON public.layer FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_layer(); + + +-- +-- TOC entry 4124 (class 2620 OID 17550) +-- Name: layer before_delete_layer_drop_limit; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER before_delete_layer_drop_limit BEFORE DELETE ON public.layer FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_layer_drop_limit(); + + +-- +-- TOC entry 4140 (class 2620 OID 17515) +-- Name: folder before_insert_folder; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER before_insert_folder BEFORE INSERT ON public.folder FOR EACH ROW EXECUTE FUNCTION public.trigger__before_insert_folder(); + + +-- +-- TOC entry 4119 (class 2620 OID 17517) +-- Name: proc before_insert_proc; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER before_insert_proc BEFORE INSERT ON public.proc FOR EACH ROW EXECUTE FUNCTION public.trigger__before_insert_proc(); + + +-- +-- TOC entry 4132 (class 2620 OID 17529) +-- Name: frame frame_history_open; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER frame_history_open AFTER UPDATE ON public.frame FOR EACH ROW WHEN (((new.str_state)::text <> (old.str_state)::text)) EXECUTE FUNCTION public.trigger__frame_history_open(); + + +-- +-- TOC entry 4111 (class 2620 OID 17539) +-- Name: point point_tier; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER point_tier BEFORE UPDATE ON public.point FOR EACH ROW EXECUTE FUNCTION public.trigger__point_tier(); + + +-- +-- TOC entry 4107 (class 2620 OID 17541) +-- Name: frame_history tbiu_frame_history; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER tbiu_frame_history BEFORE INSERT OR UPDATE ON public.frame_history FOR EACH ROW EXECUTE FUNCTION public.trigger__tbiu_frame_history(); + + +-- +-- TOC entry 4114 (class 2620 OID 17497) +-- Name: job_history tbiu_job_history; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER tbiu_job_history BEFORE INSERT OR UPDATE ON public.job_history FOR EACH ROW EXECUTE FUNCTION public.trigger__tbiu_job_history(); + + +-- +-- TOC entry 4113 (class 2620 OID 17477) +-- Name: layer_history tbiu_layer_history; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER tbiu_layer_history BEFORE INSERT OR UPDATE ON public.layer_history FOR EACH ROW EXECUTE FUNCTION public.trigger__tbiu_layer_history(); + + +-- +-- TOC entry 4112 (class 2620 OID 17509) +-- Name: folder_resource tier_folder; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER tier_folder BEFORE UPDATE ON public.folder_resource FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_folder(); + + +-- +-- TOC entry 4108 (class 2620 OID 17491) +-- Name: host_local tier_host_local; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER tier_host_local BEFORE UPDATE ON public.host_local FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_host_local(); + + +-- +-- TOC entry 4115 (class 2620 OID 17505) +-- Name: job_resource tier_job; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER tier_job BEFORE UPDATE ON public.job_resource FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_job(); + + +-- +-- TOC entry 4117 (class 2620 OID 17537) +-- Name: subscription tier_subscription; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER tier_subscription BEFORE UPDATE ON public.subscription FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_subscription(); + + +-- +-- TOC entry 4133 (class 2620 OID 17531) +-- Name: frame update_frame_checkpoint_state; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER update_frame_checkpoint_state BEFORE UPDATE ON public.frame FOR EACH ROW WHEN ((((new.str_state)::text = 'WAITING'::text) AND ((old.str_state)::text = 'RUNNING'::text) AND ((new.str_checkpoint_state)::text = ANY ((ARRAY['ENABLED'::character varying, 'COPYING'::character varying])::text[])))) EXECUTE FUNCTION public.trigger__update_frame_checkpoint_state(); + + +-- +-- TOC entry 4134 (class 2620 OID 17527) +-- Name: frame update_frame_dep_to_wait; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER update_frame_dep_to_wait BEFORE UPDATE ON public.frame FOR EACH ROW WHEN (((old.int_depend_count > 0) AND (new.int_depend_count < 1) AND ((old.str_state)::text = 'DEPEND'::text))) EXECUTE FUNCTION public.trigger__update_frame_dep_to_wait(); + + +-- +-- TOC entry 4135 (class 2620 OID 17525) +-- Name: frame update_frame_eaten; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER update_frame_eaten BEFORE UPDATE ON public.frame FOR EACH ROW WHEN ((((new.str_state)::text = 'EATEN'::text) AND ((old.str_state)::text = 'SUCCEEDED'::text))) EXECUTE FUNCTION public.trigger__update_frame_eaten(); + + +-- +-- TOC entry 4136 (class 2620 OID 17533) +-- Name: frame update_frame_status_counts; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER update_frame_status_counts AFTER UPDATE ON public.frame FOR EACH ROW WHEN ((((old.str_state)::text <> 'SETUP'::text) AND ((old.str_state)::text <> (new.str_state)::text))) EXECUTE FUNCTION public.trigger__update_frame_status_counts(); + + +-- +-- TOC entry 4137 (class 2620 OID 17523) +-- Name: frame update_frame_wait_to_dep; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER update_frame_wait_to_dep BEFORE UPDATE ON public.frame FOR EACH ROW WHEN (((new.int_depend_count > 0) AND ((new.str_state)::text = ANY ((ARRAY['DEAD'::character varying, 'SUCCEEDED'::character varying, 'WAITING'::character varying, 'CHECKPOINT'::character varying])::text[])))) EXECUTE FUNCTION public.trigger__update_frame_wait_to_dep(); + + +-- +-- TOC entry 4120 (class 2620 OID 17519) +-- Name: proc update_proc_update_layer; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER update_proc_update_layer AFTER UPDATE ON public.proc FOR EACH ROW WHEN (((new.pk_layer)::text <> (old.pk_layer)::text)) EXECUTE FUNCTION public.trigger__update_proc_update_layer(); + + +-- +-- TOC entry 4121 (class 2620 OID 17521) +-- Name: proc upgrade_proc_memory_usage; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER upgrade_proc_memory_usage AFTER UPDATE ON public.proc FOR EACH ROW WHEN ((new.int_mem_reserved <> old.int_mem_reserved)) EXECUTE FUNCTION public.trigger__upgrade_proc_memory_usage(); + + +-- +-- TOC entry 4109 (class 2620 OID 17939) +-- Name: host_local verify_host_local; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER verify_host_local BEFORE UPDATE ON public.host_local FOR EACH ROW WHEN (((new.int_cores_max = old.int_cores_max) AND (new.int_mem_max = old.int_mem_max) AND ((new.int_cores_idle <> old.int_cores_idle) OR (new.int_mem_idle <> old.int_mem_idle)) AND ((new.int_gpus_max = old.int_gpus_max) AND (new.int_gpu_mem_max = old.int_gpu_mem_max)) AND ((new.int_gpus_idle <> old.int_gpus_idle) OR (new.int_gpu_mem_idle <> old.int_gpu_mem_idle)))) EXECUTE FUNCTION public.trigger__verify_host_local(); + + +-- +-- TOC entry 4131 (class 2620 OID 17940) +-- Name: host verify_host_resources; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER verify_host_resources BEFORE UPDATE ON public.host FOR EACH ROW WHEN (((new.int_cores_idle <> old.int_cores_idle) OR (new.int_mem_idle <> old.int_mem_idle) OR (new.int_gpus_idle <> old.int_gpus_idle) OR (new.int_gpu_mem_idle <> old.int_gpu_mem_idle))) EXECUTE FUNCTION public.trigger__verify_host_resources(); + + +-- +-- TOC entry 4110 (class 2620 OID 17507) +-- Name: job_local verify_job_local; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER verify_job_local BEFORE UPDATE ON public.job_local FOR EACH ROW WHEN (((new.int_max_cores = old.int_max_cores) AND (new.int_cores > old.int_cores))) EXECUTE FUNCTION public.trigger__verify_job_local(); + + +-- +-- TOC entry 4116 (class 2620 OID 17941) +-- Name: job_resource verify_job_resources; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER verify_job_resources BEFORE UPDATE ON public.job_resource FOR EACH ROW WHEN ((((new.int_max_cores = old.int_max_cores) AND (new.int_cores > old.int_cores)) OR ((new.int_max_gpus = old.int_max_gpus) AND (new.int_gpus > old.int_gpus)))) EXECUTE FUNCTION public.trigger__verify_job_resources(); + + +-- +-- TOC entry 4118 (class 2620 OID 17535) +-- Name: subscription verify_subscription; Type: TRIGGER; Schema: public; Owner: cuebot +-- + +CREATE TRIGGER verify_subscription BEFORE UPDATE ON public.subscription FOR EACH ROW WHEN (((new.int_burst = old.int_burst) AND (new.int_cores > old.int_cores))) EXECUTE FUNCTION public.trigger__verify_subscription(); + + +-- +-- TOC entry 4104 (class 2606 OID 17165) +-- Name: action c_action_pk_filter; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.action + ADD CONSTRAINT c_action_pk_filter FOREIGN KEY (pk_filter) REFERENCES public.filter(pk_filter); + + +-- +-- TOC entry 4105 (class 2606 OID 17170) +-- Name: action c_action_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.action + ADD CONSTRAINT c_action_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); + + +-- +-- TOC entry 4103 (class 2606 OID 17175) +-- Name: alloc c_alloc_pk_facility; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.alloc + ADD CONSTRAINT c_alloc_pk_facility FOREIGN KEY (pk_facility) REFERENCES public.facility(pk_facility); + + +-- +-- TOC entry 4101 (class 2606 OID 17180) +-- Name: comments c_comment_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.comments + ADD CONSTRAINT c_comment_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); + + +-- +-- TOC entry 4102 (class 2606 OID 17185) +-- Name: comments c_comment_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.comments + ADD CONSTRAINT c_comment_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4051 (class 2606 OID 17435) +-- Name: deed c_deed_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.deed + ADD CONSTRAINT c_deed_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); + + +-- +-- TOC entry 4100 (class 2606 OID 17190) +-- Name: filter c_filter_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.filter + ADD CONSTRAINT c_filter_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4097 (class 2606 OID 17205) +-- Name: folder_level c_folder_level_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder_level + ADD CONSTRAINT c_folder_level_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); + + +-- +-- TOC entry 4098 (class 2606 OID 17200) +-- Name: folder c_folder_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder + ADD CONSTRAINT c_folder_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); + + +-- +-- TOC entry 4099 (class 2606 OID 17195) +-- Name: folder c_folder_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder + ADD CONSTRAINT c_folder_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4063 (class 2606 OID 17375) +-- Name: folder_resource c_folder_resource_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.folder_resource + ADD CONSTRAINT c_folder_resource_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); + + +-- +-- TOC entry 4045 (class 2606 OID 17465) +-- Name: frame_history c_frame_history_pk_alloc; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame_history + ADD CONSTRAINT c_frame_history_pk_alloc FOREIGN KEY (pk_alloc) REFERENCES public.alloc(pk_alloc); + + +-- +-- TOC entry 4046 (class 2606 OID 17455) +-- Name: frame_history c_frame_history_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame_history + ADD CONSTRAINT c_frame_history_pk_job FOREIGN KEY (pk_job) REFERENCES public.job_history(pk_job) ON DELETE CASCADE; + + +-- +-- TOC entry 4047 (class 2606 OID 17460) +-- Name: frame_history c_frame_history_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame_history + ADD CONSTRAINT c_frame_history_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer_history(pk_layer) ON DELETE CASCADE; + + +-- +-- TOC entry 4095 (class 2606 OID 17210) +-- Name: frame c_frame_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame + ADD CONSTRAINT c_frame_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4096 (class 2606 OID 17215) +-- Name: frame c_frame_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame + ADD CONSTRAINT c_frame_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); + + +-- +-- TOC entry 4106 (class 2606 OID 17969) +-- Name: frame_state_display_overrides c_frame_state_overrides_pk_frame; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.frame_state_display_overrides + ADD CONSTRAINT c_frame_state_overrides_pk_frame FOREIGN KEY (pk_frame) REFERENCES public.frame(pk_frame); + + +-- +-- TOC entry 4053 (class 2606 OID 17425) +-- Name: host_local c_host_local_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host_local + ADD CONSTRAINT c_host_local_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); + + +-- +-- TOC entry 4054 (class 2606 OID 17420) +-- Name: host_local c_host_local_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host_local + ADD CONSTRAINT c_host_local_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4094 (class 2606 OID 17220) +-- Name: host c_host_pk_alloc; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host + ADD CONSTRAINT c_host_pk_alloc FOREIGN KEY (pk_alloc) REFERENCES public.alloc(pk_alloc); + + +-- +-- TOC entry 4093 (class 2606 OID 17225) +-- Name: host_stat c_host_stat_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.host_stat + ADD CONSTRAINT c_host_stat_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); + + +-- +-- TOC entry 4088 (class 2606 OID 17250) +-- Name: job_env c_job_env_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_env + ADD CONSTRAINT c_job_env_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4068 (class 2606 OID 17345) +-- Name: job_history c_job_history_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_history + ADD CONSTRAINT c_job_history_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); + + +-- +-- TOC entry 4069 (class 2606 OID 17340) +-- Name: job_history c_job_history_pk_facility; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_history + ADD CONSTRAINT c_job_history_pk_facility FOREIGN KEY (pk_facility) REFERENCES public.facility(pk_facility); + + +-- +-- TOC entry 4070 (class 2606 OID 17350) +-- Name: job_history c_job_history_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_history + ADD CONSTRAINT c_job_history_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4055 (class 2606 OID 17415) +-- Name: job_local c_job_local_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_local + ADD CONSTRAINT c_job_local_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); + + +-- +-- TOC entry 4056 (class 2606 OID 17410) +-- Name: job_local c_job_local_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_local + ADD CONSTRAINT c_job_local_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4062 (class 2606 OID 17380) +-- Name: job_mem c_job_mem_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_mem + ADD CONSTRAINT c_job_mem_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4089 (class 2606 OID 17245) +-- Name: job c_job_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job + ADD CONSTRAINT c_job_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); + + +-- +-- TOC entry 4090 (class 2606 OID 17240) +-- Name: job c_job_pk_facility; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job + ADD CONSTRAINT c_job_pk_facility FOREIGN KEY (pk_facility) REFERENCES public.facility(pk_facility); + + +-- +-- TOC entry 4091 (class 2606 OID 17235) +-- Name: job c_job_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job + ADD CONSTRAINT c_job_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); + + +-- +-- TOC entry 4092 (class 2606 OID 17230) +-- Name: job c_job_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job + ADD CONSTRAINT c_job_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4065 (class 2606 OID 17360) +-- Name: job_post c_job_post_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_post + ADD CONSTRAINT c_job_post_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4066 (class 2606 OID 17365) +-- Name: job_post c_job_post_pk_post_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_post + ADD CONSTRAINT c_job_post_pk_post_job FOREIGN KEY (pk_post_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4072 (class 2606 OID 17330) +-- Name: job_resource c_job_resource_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_resource + ADD CONSTRAINT c_job_resource_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4073 (class 2606 OID 17325) +-- Name: job_stat c_job_stat_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_stat + ADD CONSTRAINT c_job_stat_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4071 (class 2606 OID 17335) +-- Name: job_usage c_job_usage_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.job_usage + ADD CONSTRAINT c_job_usage_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4085 (class 2606 OID 17265) +-- Name: layer_env c_layer_env_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_env + ADD CONSTRAINT c_layer_env_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4086 (class 2606 OID 17260) +-- Name: layer_env c_layer_env_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_env + ADD CONSTRAINT c_layer_env_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); + + +-- +-- TOC entry 4067 (class 2606 OID 17355) +-- Name: layer_history c_layer_history_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_history + ADD CONSTRAINT c_layer_history_pk_job FOREIGN KEY (pk_job) REFERENCES public.job_history(pk_job) ON DELETE CASCADE; + + +-- +-- TOC entry 4060 (class 2606 OID 17385) +-- Name: layer_mem c_layer_mem_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_mem + ADD CONSTRAINT c_layer_mem_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4061 (class 2606 OID 17390) +-- Name: layer_mem c_layer_mem_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_mem + ADD CONSTRAINT c_layer_mem_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); + + +-- +-- TOC entry 4048 (class 2606 OID 17450) +-- Name: layer_output c_layer_output_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_output + ADD CONSTRAINT c_layer_output_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4049 (class 2606 OID 17445) +-- Name: layer_output c_layer_output_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_output + ADD CONSTRAINT c_layer_output_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); + + +-- +-- TOC entry 4087 (class 2606 OID 17255) +-- Name: layer c_layer_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer + ADD CONSTRAINT c_layer_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4083 (class 2606 OID 17270) +-- Name: layer_resource c_layer_resource_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_resource + ADD CONSTRAINT c_layer_resource_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4084 (class 2606 OID 17275) +-- Name: layer_resource c_layer_resource_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_resource + ADD CONSTRAINT c_layer_resource_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); + + +-- +-- TOC entry 4081 (class 2606 OID 17280) +-- Name: layer_stat c_layer_stat_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_stat + ADD CONSTRAINT c_layer_stat_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4082 (class 2606 OID 17285) +-- Name: layer_stat c_layer_stat_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_stat + ADD CONSTRAINT c_layer_stat_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); + + +-- +-- TOC entry 4079 (class 2606 OID 17290) +-- Name: layer_usage c_layer_usage_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_usage + ADD CONSTRAINT c_layer_usage_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); + + +-- +-- TOC entry 4080 (class 2606 OID 17295) +-- Name: layer_usage c_layer_usage_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.layer_usage + ADD CONSTRAINT c_layer_usage_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); + + +-- +-- TOC entry 4078 (class 2606 OID 17300) +-- Name: matcher c_matcher_pk_filter; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.matcher + ADD CONSTRAINT c_matcher_pk_filter FOREIGN KEY (pk_filter) REFERENCES public.filter(pk_filter); + + +-- +-- TOC entry 4052 (class 2606 OID 17430) +-- Name: owner c_owner_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.owner + ADD CONSTRAINT c_owner_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4058 (class 2606 OID 17395) +-- Name: point c_point_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.point + ADD CONSTRAINT c_point_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); + + +-- +-- TOC entry 4059 (class 2606 OID 17400) +-- Name: point c_point_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.point + ADD CONSTRAINT c_point_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4076 (class 2606 OID 17305) +-- Name: proc c_proc_pk_frame; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.proc + ADD CONSTRAINT c_proc_pk_frame FOREIGN KEY (pk_frame) REFERENCES public.frame(pk_frame); + + +-- +-- TOC entry 4077 (class 2606 OID 17310) +-- Name: proc c_proc_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.proc + ADD CONSTRAINT c_proc_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); + + +-- +-- TOC entry 4064 (class 2606 OID 17370) +-- Name: show_alias c_show_alias_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.show_alias + ADD CONSTRAINT c_show_alias_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4050 (class 2606 OID 17440) +-- Name: show_service c_show_service_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.show_service + ADD CONSTRAINT c_show_service_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4074 (class 2606 OID 17315) +-- Name: subscription c_subscription_pk_alloc; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.subscription + ADD CONSTRAINT c_subscription_pk_alloc FOREIGN KEY (pk_alloc) REFERENCES public.alloc(pk_alloc); + + +-- +-- TOC entry 4075 (class 2606 OID 17320) +-- Name: subscription c_subscription_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.subscription + ADD CONSTRAINT c_subscription_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); + + +-- +-- TOC entry 4057 (class 2606 OID 17405) +-- Name: task c_task_pk_point; Type: FK CONSTRAINT; Schema: public; Owner: cuebot +-- + +ALTER TABLE ONLY public.task + ADD CONSTRAINT c_task_pk_point FOREIGN KEY (pk_point) REFERENCES public.point(pk_point); + + +-- Completed on 2025-09-04 18:51:09 UTC + +-- +-- PostgreSQL database dump complete +-- diff --git a/rust/crates/scheduler/src/allocation.rs b/rust/crates/scheduler/src/allocation.rs new file mode 100644 index 000000000..daf7a86f8 --- /dev/null +++ b/rust/crates/scheduler/src/allocation.rs @@ -0,0 +1,83 @@ +use std::{ + collections::HashMap, + sync::{Arc, RwLock}, +}; + +use miette::Result; +use tokio::{sync::OnceCell, time}; +use uuid::Uuid; + +use crate::{ + config::CONFIG, dao::AllocationDao, dao::AllocationName, dao::ShowId, models::Subscription, +}; + +pub struct AllocationService { + cache: Arc>>>, + allocation_dao: Arc, +} + +static ALLOCATION_SERVICE: OnceCell> = OnceCell::const_new(); + +pub async fn allocation_service() -> Result> { + ALLOCATION_SERVICE + .get_or_try_init(|| async { + let service = AllocationService::init().await?; + + service.start_async_loop(); + + Ok(Arc::new(service)) + }) + .await + .cloned() +} + +impl AllocationService { + async fn init() -> Result { + let allocation_dao = Arc::new(AllocationDao::new().await?); + let cache = Arc::new(RwLock::new(HashMap::new())); + let service = AllocationService { + cache: cache.clone(), + allocation_dao: allocation_dao.clone(), + }; + + // Fetch data at init to avoid having to wait for a loop iteration to fill up the cache + let subs = allocation_dao.get_subscriptions_by_show().await?; + let mut lock = cache.write().unwrap_or_else(|poison| poison.into_inner()); + *lock = subs; + + Ok(service) + } + + fn start_async_loop(&self) { + let cache = self.cache.clone(); + let allocation_dao = self.allocation_dao.clone(); + + tokio::spawn(async move { + let mut interval = time::interval(CONFIG.queue.allocation_refresh_interval); + + loop { + interval.tick().await; + + let subs = allocation_dao + .get_subscriptions_by_show() + .await + .expect("Failed to fetch list of subscriptions."); + let mut lock = cache.write().unwrap_or_else(|poison| poison.into_inner()); + *lock = subs; + } + }); + } + + pub fn get_subscription( + &self, + allocation_name: &String, + show_id: &Uuid, + ) -> Option { + self.cache + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .get(show_id)? + .get(allocation_name) + .cloned() + } +} diff --git a/rust/crates/scheduler/src/cluster.rs b/rust/crates/scheduler/src/cluster.rs new file mode 100644 index 000000000..e3e108d80 --- /dev/null +++ b/rust/crates/scheduler/src/cluster.rs @@ -0,0 +1,412 @@ +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, Mutex, RwLock, + }, + time::{Duration, SystemTime}, +}; + +use futures::StreamExt; +use itertools::Itertools; +use miette::{IntoDiagnostic, Result}; +use serde::{Deserialize, Serialize}; +use tokio::sync::mpsc; +use tracing::{debug, error, warn}; +use uuid::Uuid; + +use crate::{ + cluster_key::{ClusterKey, Tag, TagType}, + config::CONFIG, + dao::{helpers::parse_uuid, ClusterDao}, +}; + +pub static CLUSTER_ROUNDS: AtomicUsize = AtomicUsize::new(0); + +#[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq)] +pub enum Cluster { + ComposedKey(ClusterKey), + /// facility_id: Uuid, + /// tags: Vec + TagsKey(Uuid, Vec), +} + +#[derive(Debug)] +pub struct ClusterFeed { + pub clusters: Arc>>, + current_index: Arc, + stop_flag: Arc, + sleep_map: Arc>>, +} + +/// Control messages for the cluster feed stream. +/// +/// These messages are sent to the control channel returned by `ClusterFeed::stream()` +/// to influence feed behavior during runtime. +pub enum FeedMessage { + /// Stops the cluster feed stream gracefully. + Stop(), + /// Puts a specific cluster to sleep for the given duration. + /// + /// # Fields + /// + /// * `Cluster` - The cluster to put to sleep + /// * `Duration` - How long to sleep before the cluster can be processed again + Sleep(Cluster, Duration), +} + +impl Cluster { + /// Returns an iterator over the tags associated with this cluster. + /// + /// # Returns + /// + /// * `Box>` - Iterator over tag references + pub fn tags(&self) -> Box + '_> { + match self { + Cluster::ComposedKey(cluster_key) => Box::new(std::iter::once(&cluster_key.tag)), + Cluster::TagsKey(_facility_id, tags) => Box::new(tags.iter()), + } + } +} + +impl ClusterFeed { + /// Loads all clusters from the database and organizes them by tag type. + /// + /// Loads allocation clusters (one per facility+show+tag), and chunks manual/hostname tags + /// into groups based on configured chunk sizes. In a distributed system, this should be + /// scheduled and coordinated across nodes. + /// + /// # Arguments + /// + /// * `facility_id` - Optional facility ID to filter clusters + /// * `ignore_tags` - List of tag names to ignore when loading clusters + /// + /// # Returns + /// + /// * `Ok(ClusterFeed)` - Successfully loaded cluster feed + /// * `Err(miette::Error)` - Failed to load clusters from database + pub async fn load_all(facility_id: &Option, ignore_tags: &[String]) -> Result { + let cluster_dao = ClusterDao::new().await?; + + // Fetch clusters for both facilitys+shows+tags and just tags + let mut clusters_stream = cluster_dao + .fetch_alloc_clusters() + .chain(cluster_dao.fetch_non_alloc_clusters()); + let mut clusters = Vec::new(); + let mut manual_tags: HashMap> = HashMap::new(); + let mut hostname_tags: HashMap> = HashMap::new(); + + // Collect all tags + while let Some(record) = clusters_stream.next().await { + match record { + Ok(cluster) => { + // Skip tags that are in the ignore list + if ignore_tags.contains(&cluster.tag) { + continue; + } + + match cluster.ttype.as_str() { + // Each alloc tag becomes its own cluster + "ALLOC" => { + let cluster_facility_id = parse_uuid(&cluster.facility_id); + if facility_id + .as_ref() + .map(|fid| fid == &cluster_facility_id) + .unwrap_or(true) + { + clusters.push(Cluster::ComposedKey(ClusterKey { + facility_id: cluster_facility_id, + show_id: parse_uuid(&cluster.show_id), + tag: Tag { + name: cluster.tag, + ttype: TagType::Alloc, + }, + })); + } + } + // Manual and hostname tags are collected to be chunked + "MANUAL" => { + manual_tags + .entry(parse_uuid(&cluster.facility_id)) + .or_default() + .push(cluster.tag); + } + "HOSTNAME" => { + hostname_tags + .entry(parse_uuid(&cluster.facility_id)) + .or_default() + .push(cluster.tag); + } + _ => (), + }; + } + Err(err) => error!("Failed to fetch clusters. {err}"), + } + } + + // Chunk Manual tags + for (facility_id, tags) in manual_tags.into_iter() { + for chunk in &tags.into_iter().chunks(CONFIG.queue.manual_tags_chunk_size) { + clusters.push(Cluster::TagsKey( + facility_id, + chunk + .map(|name| Tag { + name, + ttype: TagType::Manual, + }) + .collect(), + )) + } + } + + // Chunk Hostname tags + for (facility_id, tags) in hostname_tags.into_iter() { + for chunk in &tags + .into_iter() + .chunks(CONFIG.queue.hostname_tags_chunk_size) + { + clusters.push(Cluster::TagsKey( + facility_id, + chunk + .map(|name| Tag { + name, + ttype: TagType::HostName, + }) + .collect(), + )) + } + } + + Ok(ClusterFeed { + clusters: Arc::new(RwLock::new(clusters)), + current_index: Arc::new(AtomicUsize::new(0)), + stop_flag: Arc::new(AtomicBool::new(false)), + sleep_map: Arc::new(Mutex::new(HashMap::new())), + }) + } + + /// Creates a ClusterFeed from a predefined list of clusters for testing. + /// + /// # Arguments + /// + /// * `clusters` - List of clusters to iterate over + /// * `ignore_tags` - List of tag names to ignore when loading clusters + /// + /// # Returns + /// + /// * `ClusterFeed` - Feed configured to run once through the provided clusters + #[allow(dead_code)] + pub fn load_from_clusters(clusters: Vec, ignore_tags: &[String]) -> Self { + // Filter out ignored tags from clusters + let filtered_clusters: Vec = clusters + .into_iter() + .filter_map(|cluster| match cluster { + // For ComposedKey, remove the entire cluster if its tag is ignored + Cluster::ComposedKey(key) => { + if ignore_tags.contains(&key.tag.name) { + None + } else { + Some(Cluster::ComposedKey(key)) + } + } + // For TagsKey, filter out ignored tags from the list + Cluster::TagsKey(facility_id, tags) => { + let filtered_tags: Vec = tags + .into_iter() + .filter(|tag| !ignore_tags.contains(&tag.name)) + .collect(); + // Only keep the cluster if it still has tags after filtering + if filtered_tags.is_empty() { + None + } else { + Some(Cluster::TagsKey(facility_id, filtered_tags)) + } + } + }) + .collect(); + + ClusterFeed { + clusters: Arc::new(RwLock::new(filtered_clusters)), + current_index: Arc::new(AtomicUsize::new(0)), + stop_flag: Arc::new(AtomicBool::new(false)), + sleep_map: Arc::new(Mutex::new(HashMap::new())), + } + } + + /// Streams clusters to a channel receiver with backpressure control. + /// + /// Creates a producer-consumer pattern where clusters are sent through a channel + /// to the provided sender. The stream can be controlled via the returned message + /// channel (for sleep/stop commands). + /// + /// # Arguments + /// + /// * `sender` - Channel sender for emitting clusters + /// + /// # Returns + /// + /// * `mpsc::Sender` - Control channel for sending sleep/stop messages + /// + /// # Behavior + /// + /// - Iterates through clusters in round-robin fashion + /// - Skips sleeping clusters until their wake time expires + /// - Applies backoff delays between rounds (varies based on sleeping cluster count) + /// - Stops when receiving a Stop message or when configured empty cycles limit is reached + /// - Automatically cleans up expired sleep entries + pub async fn stream(self, sender: mpsc::Sender) -> mpsc::Sender { + // Use a small channel to ensure the producer waits for items to be consumed before + // generating more + let (cancel_sender, mut feed_receiver) = mpsc::channel(8); + + let stop_flag = self.stop_flag.clone(); + let sleep_map = self.sleep_map.clone(); + + // Stream clusters on the caller channel + tokio::spawn(async move { + let mut all_sleeping_rounds = 0; + let feed = self.clusters.clone(); + let current_index_atomic = self.current_index.clone(); + + loop { + // Check stop flag + if stop_flag.load(Ordering::Relaxed) { + warn!("Cluster received a stop message. Stopping feed."); + break; + } + + let (item, cluster_size, completed_round) = { + let clusters = feed.read().unwrap_or_else(|poisoned| poisoned.into_inner()); + if clusters.is_empty() { + break; + } + + let current_index = current_index_atomic.load(Ordering::Relaxed); + let item = clusters[current_index].clone(); + let next_index = (current_index + 1) % clusters.len(); + let completed_round = next_index == 0; // Detect wrap-around + current_index_atomic.store(next_index, Ordering::Relaxed); + + (item, clusters.len(), completed_round) + }; + + // Skip cluster if it is marked as sleeping + let is_sleeping = { + let mut sleep_map_lock = sleep_map.lock().unwrap_or_else(|p| p.into_inner()); + if let Some(wake_up_time) = sleep_map_lock.get(&item) { + if *wake_up_time > SystemTime::now() { + // Still sleeping, skip it + true + } else { + // Remove expired entries + sleep_map_lock.remove(&item); + false + } + } else { + false + } + }; + + if !is_sleeping && sender.send(item).await.is_err() { + warn!("Cluster receiver dropped. Stopping feed."); + break; + } + + // At end of round, add backoff sleep + if completed_round { + CLUSTER_ROUNDS.fetch_add(1, Ordering::Relaxed); + + // Check if all/most clusters are sleeping + let sleeping_count = { + let sleep_map_lock = sleep_map.lock().unwrap_or_else(|p| p.into_inner()); + sleep_map_lock.len() + }; + if sleeping_count >= cluster_size { + // Ensure this doesn't loop forever when there's a limit configured + all_sleeping_rounds += 1; + if let Some(max_empty_cycles) = CONFIG.queue.empty_job_cycles_before_quiting + { + if all_sleeping_rounds > max_empty_cycles { + warn!("All clusters have been sleeping for too long"); + break; + } + } + + // All clusters sleeping, sleep longer + tokio::time::sleep(Duration::from_secs(5)).await; + } else if sleeping_count > 0 { + // Some clusters sleeping, brief pause + tokio::time::sleep(Duration::from_millis(100)).await; + } else { + // Active work, minimal pause + tokio::time::sleep(Duration::from_millis(10)).await; + } + } + } + }); + + // Process messages on the receiving end + let sleep_map = self.sleep_map.clone(); + tokio::spawn(async move { + while let Some(message) = feed_receiver.recv().await { + match message { + FeedMessage::Sleep(cluster, duration) => { + if let Some(wake_up_time) = SystemTime::now().checked_add(duration) { + debug!("{:?} put to sleep for {}s", cluster, duration.as_secs()); + { + let mut sleep_map_lock = + sleep_map.lock().unwrap_or_else(|p| p.into_inner()); + sleep_map_lock.insert(cluster, wake_up_time); + } + } else { + warn!( + "Sleep request ignored for {:?}. Invalid duration={}s", + cluster, + duration.as_secs() + ); + } + } + FeedMessage::Stop() => { + self.stop_flag.store(true, Ordering::Relaxed); + break; + } + } + } + }); + + cancel_sender + } +} + +/// Looks up a facility ID by facility name. +/// +/// # Arguments +/// +/// * `facility_name` - The name of the facility +/// +/// # Returns +/// +/// * `Ok(Uuid)` - The facility ID +/// * `Err(miette::Error)` - If facility not found or database error +pub async fn get_facility_id(facility_name: &str) -> Result { + let cluster_dao = ClusterDao::new().await?; + cluster_dao + .get_facility_id(facility_name) + .await + .into_diagnostic() +} + +/// Looks up a show ID by show name. +/// +/// # Arguments +/// +/// * `show_name` - The name of the show +/// +/// # Returns +/// +/// * `Ok(Uuid)` - The show ID +/// * `Err(miette::Error)` - If show not found or database error +pub async fn get_show_id(show_name: &str) -> Result { + let cluster_dao = ClusterDao::new().await?; + cluster_dao.get_show_id(show_name).await.into_diagnostic() +} diff --git a/rust/crates/scheduler/src/cluster_key.rs b/rust/crates/scheduler/src/cluster_key.rs new file mode 100644 index 000000000..c20e75d3c --- /dev/null +++ b/rust/crates/scheduler/src/cluster_key.rs @@ -0,0 +1,54 @@ +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq)] +pub enum TagType { + Alloc, + HostName, + Manual, +} + +#[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq)] +pub struct Tag { + pub name: String, + pub ttype: TagType, +} + +impl std::ops::Deref for Tag { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.name + } +} + +impl std::fmt::Display for Tag { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name) + } +} + +impl AsRef for Tag { + fn as_ref(&self) -> &str { + &self.name + } +} + +impl std::borrow::Borrow for Tag { + fn borrow(&self) -> &str { + &self.name + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq)] +pub struct ClusterKey { + pub facility_id: Uuid, + pub show_id: Uuid, + pub tag: Tag, +} + +impl std::fmt::Display for ClusterKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}:{}", self.facility_id, self.show_id, self.tag) + } +} diff --git a/rust/crates/scheduler/src/config/error.rs b/rust/crates/scheduler/src/config/error.rs new file mode 100644 index 000000000..f9f57c03c --- /dev/null +++ b/rust/crates/scheduler/src/config/error.rs @@ -0,0 +1,32 @@ +use miette::Diagnostic; +use thiserror::Error; +use tonic::Status; + +//===Scheduler Config Error=== +#[derive(Debug, Error, Diagnostic)] +pub enum JobQueueConfigError { + #[error("Failed to load config file")] + LoadConfigError(String), + + #[error("Failed to start application via config file")] + StartFromConfigError(String), + + #[error("Invalid Path configuration")] + InvalidPath(String), +} + +impl From for Status { + fn from(e: JobQueueConfigError) -> Self { + match e { + JobQueueConfigError::LoadConfigError(msg) => { + Status::invalid_argument(format!("Failed to load config: {}", msg)) + } + JobQueueConfigError::StartFromConfigError(msg) => { + Status::internal(format!("Failed to start: {}", msg)) + } + JobQueueConfigError::InvalidPath(msg) => { + Status::invalid_argument(format!("Invalid path: {}", msg)) + } + } + } +} diff --git a/rust/crates/scheduler/src/config/mod.rs b/rust/crates/scheduler/src/config/mod.rs new file mode 100644 index 000000000..22eb6ac1f --- /dev/null +++ b/rust/crates/scheduler/src/config/mod.rs @@ -0,0 +1,368 @@ +pub mod error; + +use crate::config::error::JobQueueConfigError; +use bytesize::ByteSize; +use config::{Config as ConfigBase, Environment, File}; +use lazy_static::lazy_static; +use once_cell::sync::OnceCell; +use serde::Deserialize; +use std::{env, fs, path::PathBuf, time::Duration}; + +static DEFAULT_CONFIG_FILE: &str = "~/.local/share/rqd.yaml"; + +pub static OVERRIDE_CONFIG: OnceCell = OnceCell::new(); + +lazy_static! { + pub static ref CONFIG: Config = OVERRIDE_CONFIG + .get() + .cloned() + .unwrap_or_else(|| Config::load().expect("Failed to load config file")); +} + +//===Config Types=== + +#[derive(Debug, Deserialize, Default, Clone)] +#[serde(default)] +pub struct Config { + pub logging: LoggingConfig, + pub queue: QueueConfig, + pub database: DatabaseConfig, + pub rqd: RqdConfig, + pub host_cache: HostCacheConfig, + pub scheduler: SchedulerConfig, +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct LoggingConfig { + // Logging level: debug|info|warning|error + pub level: String, + // Path to the log file if `file_appender` is enabled + pub path: String, + // Log to stdout if file_appender is False + pub file_appender: bool, +} + +impl Default for LoggingConfig { + fn default() -> Self { + Self { + level: "debug:sqlx=info".to_string(), + path: "/opt/rqd/logs/scheduler.log".to_string(), + file_appender: false, + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct QueueConfig { + #[serde(with = "humantime_serde")] + pub monitor_interval: Duration, + pub worker_threads: usize, + pub dispatch_frames_per_layer_limit: usize, + pub core_multiplier: u32, + pub memory_stranded_threshold: ByteSize, + #[serde(with = "humantime_serde")] + pub job_back_off_duration: Duration, + pub stream: StreamConfig, + pub manual_tags_chunk_size: usize, + pub hostname_tags_chunk_size: usize, + pub host_candidate_attemps_per_layer: usize, + pub empty_job_cycles_before_quiting: Option, + pub mem_reserved_min: ByteSize, + #[serde(with = "humantime_serde")] + pub allocation_refresh_interval: Duration, + pub selfish_services: Vec, + pub host_booking_strategy: HostBookingStrategy, + pub frame_memory_soft_limit: f64, + pub frame_memory_hard_limit: f64, +} + +impl Default for QueueConfig { + fn default() -> QueueConfig { + QueueConfig { + monitor_interval: Duration::from_secs(5), + worker_threads: 4, + dispatch_frames_per_layer_limit: 20, + core_multiplier: 100, + memory_stranded_threshold: ByteSize::gib(2), + job_back_off_duration: Duration::from_secs(300), + stream: StreamConfig::default(), + manual_tags_chunk_size: 100, + hostname_tags_chunk_size: 300, + host_candidate_attemps_per_layer: 10, + empty_job_cycles_before_quiting: None, + mem_reserved_min: ByteSize::mib(250), + allocation_refresh_interval: Duration::from_secs(3), + selfish_services: Vec::new(), + host_booking_strategy: HostBookingStrategy::default(), + frame_memory_soft_limit: 1.6, + frame_memory_hard_limit: 2.0, + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct StreamConfig { + pub cluster_buffer_size: usize, + pub job_buffer_size: usize, +} + +impl Default for StreamConfig { + fn default() -> Self { + Self { + cluster_buffer_size: 3, + job_buffer_size: 3, + } + } +} + +#[derive(Debug, Deserialize, Clone, Copy)] +#[serde(default)] +pub struct HostBookingStrategy { + pub core_saturation: bool, + pub memory_saturation: bool, +} + +impl Default for HostBookingStrategy { + fn default() -> Self { + Self { + core_saturation: true, + memory_saturation: false, + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct DatabaseConfig { + pub pool_size: u32, + pub db_host: String, + pub db_name: String, + pub db_user: String, + pub db_pass: String, + pub db_port: u16, + pub core_multiplier: u32, +} + +impl Default for DatabaseConfig { + fn default() -> DatabaseConfig { + DatabaseConfig { + pool_size: 20, + core_multiplier: 100, + db_host: "localhost".to_string(), + db_name: "test".to_string(), + db_user: "postgres".to_string(), + db_pass: "password".to_string(), + db_port: 5432, + } + } +} + +impl DatabaseConfig { + pub fn connection_url(&self) -> String { + let encoded_user = urlencoding::encode(&self.db_user); + let encoded_pass = urlencoding::encode(&self.db_pass); + format!( + "postgresql://{}:{}@{}:{}/{}?options=-c%20timezone%3DUTC", + encoded_user, encoded_pass, self.db_host, self.db_port, self.db_name + ) + } +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct TopicConfig { + pub topic_name: String, + pub num_partitions: i32, + pub replication_factor: i32, + #[serde(with = "humantime_serde")] + pub retention: Duration, +} + +impl Default for TopicConfig { + fn default() -> TopicConfig { + TopicConfig { + topic_name: "general_job_queue".to_string(), + num_partitions: 12, + replication_factor: 3, + retention: Duration::from_secs(300), + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct RqdConfig { + pub grpc_port: u32, + pub dry_run_mode: bool, +} + +impl Default for RqdConfig { + fn default() -> RqdConfig { + RqdConfig { + grpc_port: 8444, + dry_run_mode: false, + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct HostCacheConfig { + pub concurrent_groups: usize, + pub memory_key_divisor: ByteSize, + #[serde(with = "humantime_serde")] + pub checkout_timeout: Duration, + #[serde(with = "humantime_serde")] + pub monitoring_interval: Duration, + #[serde(with = "humantime_serde")] + pub clean_up_interval: Duration, + #[serde(with = "humantime_serde")] + pub group_idle_timeout: Duration, + pub concurrent_fetch_permit: usize, + #[serde(with = "humantime_serde")] + pub host_staleness_threshold: Duration, + pub update_stat_on_book: bool, +} + +impl Default for HostCacheConfig { + fn default() -> HostCacheConfig { + HostCacheConfig { + concurrent_groups: 3, + memory_key_divisor: ByteSize::gib(2), + checkout_timeout: Duration::from_secs(12), + monitoring_interval: Duration::from_secs(10), + clean_up_interval: Duration::from_secs(5 * 60), + group_idle_timeout: Duration::from_secs(3 * 60 * 60), + concurrent_fetch_permit: 4, + host_staleness_threshold: Duration::from_secs(2 * 60), // 2 minutes + update_stat_on_book: false, + } + } +} + +#[derive(Debug, Deserialize, Clone, Default)] +#[serde(default)] +pub struct SchedulerConfig { + pub facility: Option, + pub alloc_tags: Vec, + pub manual_tags: Vec, + pub ignore_tags: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct AllocTag { + pub show: String, + pub tag: String, +} + +//===Config Loader=== + +impl Config { + /// Loads the current configuration from system config file and environment variables. + /// + /// Configuration sources are applied in the following order (later sources override earlier): + /// 1. Default config file: `~/.local/share/rqd.yaml` + /// 2. Custom config file: specified via `OPENCUE_SCHEDULER_CONFIG` environment variable + /// 3. Environment variables: prefixed with `OPENSCHEDULER_`, using `_` as separator + /// + /// # Returns + /// + /// * `Ok(Config)` - Successfully loaded configuration + /// * `Err(JobQueueConfigError)` - Failed to load or deserialize configuration + pub fn load() -> Result { + let mut required = false; + let config_file = match env::var("OPENCUE_SCHEDULER_CONFIG") { + Ok(v) => { + println!( + " INFO Config: {}", + fs::canonicalize(&v) + .unwrap_or(PathBuf::from("Invalid path")) + .to_string_lossy() + ); + required = true; + v + } + Err(_) => DEFAULT_CONFIG_FILE.to_string(), + }; + + println!(" INFO Config::load: using config file: {:?}", config_file); + + let config = ConfigBase::builder() + .add_source(File::with_name(&config_file).required(required)) + .add_source(Environment::with_prefix("OPENSCHEDULER")) + .build() + .map_err(|err| { + JobQueueConfigError::LoadConfigError(format!( + "{:?} config could not be loaded. {}", + &config_file, err + )) + })?; + + let deserialized_config = Config::deserialize(config).map_err(|err| { + JobQueueConfigError::LoadConfigError(format!( + "{:?} config could not be deserialized. {}", + &config_file, err + )) + })?; + + Ok(deserialized_config) + } + + /// Loads configuration from a specified file path with environment variable overrides. + /// + /// # Arguments + /// + /// * `path` - Path to the configuration file + /// + /// # Returns + /// + /// * `Ok(Config)` - Successfully loaded configuration + /// * `Err(JobQueueConfigError)` - Failed to load or deserialize configuration + #[allow(dead_code)] + pub fn load_file_and_env>(path: P) -> Result { + let config = ConfigBase::builder() + .add_source(File::with_name(path.as_ref())) + .add_source(Environment::with_prefix("RQD").separator("_")) + .build(); + + config + .map(|c| Config::deserialize(c).unwrap()) + .map_err(|err| { + JobQueueConfigError::LoadConfigError(format!( + "{:?} config could not be loaded. {}", + path.as_ref(), + err + )) + }) + } + + /// Loads configuration from a specified file path without environment variable overrides. + /// + /// # Arguments + /// + /// * `path` - Path to the configuration file + /// + /// # Returns + /// + /// * `Ok(Config)` - Successfully loaded configuration + /// * `Err(JobQueueConfigError)` - Failed to load or deserialize configuration + #[allow(dead_code)] + pub fn load_file>(path: P) -> Result { + let config = ConfigBase::builder() + .add_source(File::with_name(path.as_ref())) + .build(); + + config + .map(|c| Config::deserialize(c).unwrap()) + .map_err(|err| { + JobQueueConfigError::LoadConfigError(format!( + "{:?} config could not be loaded. {}", + path.as_ref(), + err + )) + }) + } +} diff --git a/rust/crates/scheduler/src/dao/allocation_dao.rs b/rust/crates/scheduler/src/dao/allocation_dao.rs new file mode 100644 index 000000000..7a0563711 --- /dev/null +++ b/rust/crates/scheduler/src/dao/allocation_dao.rs @@ -0,0 +1,231 @@ +use miette::{Context, IntoDiagnostic, Result}; +use sqlx::{Pool, Postgres}; +use std::sync::Arc; +use std::{cmp, collections::HashMap}; +use uuid::Uuid; + +use crate::{ + dao::helpers::parse_uuid, + models::{Allocation, CoreSize, Subscription}, + pgpool::connection_pool, +}; + +pub type ShowId = Uuid; +pub type AllocationName = String; + +/// Database model for a subscription. +/// +/// Maps directly to the database schema with raw column names. +/// Should be converted to `Subscription` for business logic use. +#[derive(sqlx::FromRow)] +pub struct SubscriptionModel { + pub pk_subscription: String, + pub pk_alloc: String, + pub str_alloc_name: String, + pub pk_show: String, + pub int_size: i64, + pub int_burst: i64, + pub int_cores: i32, + pub int_gpus: i32, +} + +impl From for Subscription { + fn from(val: SubscriptionModel) -> Self { + // There was a condition on cuebot in the past that would allow negative core values on + // the subscription table. If bookedcores is negative, use 0. + let booked_cores = cmp::max(val.int_cores, 0); + + Subscription { + id: parse_uuid(&val.pk_subscription), + allocation_id: parse_uuid(&val.pk_alloc), + allocation_name: val.str_alloc_name, + show_id: parse_uuid(&val.pk_show), + size: val.int_size, + burst: CoreSize::from_multiplied( + val.int_burst.try_into().expect("int_burst should fit i32"), + ), + booked_cores: CoreSize::from_multiplied(booked_cores), + gpus: val.int_gpus.try_into().expect("int_gpus should fit in u32"), + } + } +} + +/// Database model for an allocation. +/// +/// Maps directly to the database schema with raw column names. +/// Should be converted to `Allocation` for business logic use. +#[derive(sqlx::FromRow)] +pub struct AllocationModel { + pub pk_alloc: String, + pub str_name: String, + pub b_allow_edit: bool, + pub b_default: bool, + pub str_tag: Option, + pub b_billable: bool, + pub pk_facility: String, + pub b_enabled: Option, +} + +impl From for Allocation { + fn from(val: AllocationModel) -> Self { + Allocation { + id: parse_uuid(&val.pk_alloc), + name: val.str_name, + allow_edit: val.b_allow_edit, + is_default: val.b_default, + tag: val.str_tag, + billable: val.b_billable, + facility_id: parse_uuid(&val.pk_facility), + enabled: val.b_enabled.unwrap_or(true), + } + } +} + +/// Data Access Object for allocation and subscription operations. +/// +/// An allocation represents a pool of compute resources within a facility that can be +/// assigned to shows through subscriptions. This DAO provides methods to query allocation +/// and subscription data from the database. +/// +/// # Purpose +/// +/// The `AllocationDao` is responsible for: +/// - Retrieving subscription information organized by show +/// - Querying allocation details and their associated subscriptions +/// - Supporting resource allocation and capacity planning queries +pub struct AllocationDao { + /// Shared connection pool for database operations. + #[allow(dead_code)] + connection_pool: Arc>, +} + +/// SQL query to retrieve all subscriptions with their complete data. +/// +/// Returns all columns from the subscription table, which will be used to +/// organize subscriptions by show_id for efficient lookup. +/// +/// # Returns +/// +/// All subscription records with columns: +/// - pk_subscription, pk_alloc, pk_show, int_size, int_burst, +/// int_cores, float_tier, int_gpus +static SELECT_ALL_SUBSCRIPTIONS: &str = r#" + SELECT + s.pk_subscription, + s.pk_alloc, + a.str_name as str_alloc_name, + s.pk_show, + s.int_size, + s.int_burst, + s.int_cores, + s.float_tier, + s.int_gpus + FROM subscription s + JOIN alloc a ON s.pk_alloc = a.pk_alloc + ORDER BY pk_show, pk_alloc +"#; + +impl AllocationDao { + /// Creates a new `AllocationDao` instance with a connection pool. + /// + /// This constructor initializes the DAO by obtaining a shared database connection pool. + /// The connection pool is reused across all operations for efficiency. + /// + /// # Returns + /// + /// Returns `Ok(AllocationDao)` on success, or an error if the connection pool cannot be obtained. + /// + /// # Errors + /// + /// This function will return an error if: + /// - The database connection pool cannot be created + /// - Database connection parameters are invalid + pub async fn new() -> Result { + let pool = connection_pool().await.into_diagnostic()?; + Ok(AllocationDao { + connection_pool: pool, + }) + } + + /// Retrieves all subscriptions organized by show_id and allocation_name. + /// + /// This method fetches all subscription records from the database and organizes them + /// into a nested HashMap structure. The outer HashMap is keyed by show_id (pk_show), + /// and each value is an inner HashMap keyed by allocation_name (alloc.str_name) containing + /// the Subscription object. This structure enables efficient lookup of specific + /// subscriptions by both show and allocation during scheduling operations. + /// + /// # Returns + /// + /// Returns `Ok(HashMap>)` where: + /// - Outer Key: `pk_show` - The show identifier + /// - Inner Key: `pk_alloc` - The allocation identifier + /// - Inner Value: `Subscription` object containing all subscription data + /// + /// # Errors + /// + /// This function will return an error if: + /// - The SQL query execution fails (e.g., connection issues) + /// - The database schema doesn't match the expected structure + /// - Data type conversions fail + pub async fn get_subscriptions_by_show( + &self, + ) -> Result>> { + // Fetch all subscriptions from the database as models + let subscription_models: Vec = sqlx::query_as(SELECT_ALL_SUBSCRIPTIONS) + .fetch_all(self.connection_pool.as_ref()) + .await + .into_diagnostic() + .wrap_err("Failed to fetch subscriptions")?; + + // Organize subscriptions by show_id and allocation_name, converting models to business objects + let mut subscriptions_by_show: HashMap> = + HashMap::new(); + + for subs_model in subscription_models { + let show_id = parse_uuid(&subs_model.pk_show); + let allocation_name = subs_model.str_alloc_name.clone(); + let subscription = Subscription::from(subs_model); + subscriptions_by_show + .entry(show_id) + .or_default() + .insert(allocation_name, subscription); + } + + Ok(subscriptions_by_show) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + #[ignore] // Requires database setup + async fn test_get_subscriptions_by_show() -> Result<()> { + let dao = AllocationDao::new().await?; + let subscriptions = dao.get_subscriptions_by_show().await?; + + // Verify the structure is correct + for (show_id, allocation_map) in subscriptions.iter() { + assert!( + !allocation_map.is_empty(), + "Should have at least one subscription" + ); + + for (allocation_name, subscription) in allocation_map { + assert_eq!( + &subscription.show_id, show_id, + "Subscription show_id should match the outer HashMap key" + ); + assert_eq!( + &subscription.allocation_id.to_string(), + allocation_name, + "Subscription allocation_id should match the inner HashMap key" + ); + } + } + + Ok(()) + } +} diff --git a/rust/crates/scheduler/src/dao/cluster_dao.rs b/rust/crates/scheduler/src/dao/cluster_dao.rs new file mode 100644 index 000000000..c98589a3e --- /dev/null +++ b/rust/crates/scheduler/src/dao/cluster_dao.rs @@ -0,0 +1,152 @@ +use std::sync::Arc; + +use futures::Stream; +use miette::{IntoDiagnostic, Result}; +use serde::{Deserialize, Serialize}; +use sqlx::{Pool, Postgres}; +use uuid::Uuid; + +use crate::{dao::helpers::parse_uuid, pgpool::connection_pool}; + +/// Data Access Object for host operations in the job dispatch system. +/// +/// Manages database operations related to render hosts, including: +/// - Finding suitable hosts for layer dispatch +/// - Host resource locking and unlocking +/// - Updating host resource availability after dispatch +pub struct ClusterDao { + connection_pool: Arc>, +} + +/// Database model representing a host with its current resource availability. +/// +/// Contains host metadata, resource information, and allocation details +/// needed for dispatch matching. This model is converted to the business +/// logic `Host` type for processing. +#[derive(sqlx::FromRow, Serialize, Deserialize)] +pub struct ClusterModel { + pub tag: String, + pub show_id: String, + pub facility_id: String, + pub ttype: String, +} + +static QUERY_ALLOC_CLUSTERS: &str = r#" +SELECT DISTINCT + a.str_tag as tag, + sh.pk_show as show_id, + a.pk_facility as facility_id, + 'ALLOC' as ttype +FROM host_tag + JOIN alloc a ON a.str_tag = host_tag.str_tag + JOIN subscription sub ON sub.pk_alloc = a.pk_alloc + JOIN show sh ON sub.pk_show = sh.pk_show +WHERE str_tag_type = 'ALLOC' + AND sh.b_active = true +"#; + +static QUERY_NON_ALLOC_CLUSTERS: &str = r#" +SELECT DISTINCT + host_tag.str_tag as tag, + s.pk_show as show_id, + a.pk_facility as facility_id, + str_tag_type as ttype +FROM host_tag +JOIN host h on h.pk_host = host_tag.pk_host +JOIN alloc a ON a.pk_alloc = h.pk_alloc +JOIN subscription s ON a.pk_alloc = s.pk_alloc +WHERE str_tag_type <> 'ALLOC' +"#; + +static QUERY_FACILITY_ID: &str = r#" +SELECT pk_facility +FROM facility +WHERE str_name = $1 +"#; + +static QUERY_SHOW_ID: &str = r#" +SELECT pk_show +FROM show +WHERE str_name = $1 +"#; + +impl ClusterDao { + /// Creates a new HostDao from database configuration. + /// + /// Establishes a connection pool to the PostgreSQL database for + /// host-related operations. + /// + /// # Returns + /// * `Ok(HostDao)` - Configured DAO ready for host operations + /// * `Err(miette::Error)` - If database connection fails + pub async fn new() -> Result { + let pool = connection_pool().await.into_diagnostic()?; + Ok(ClusterDao { + connection_pool: pool, + }) + } + + /// Fetches all allocation-based clusters from the database. + /// + /// Returns clusters defined by facility, show, and allocation tag combinations. + /// Only includes active shows with host tags. + /// + /// # Returns + /// + /// * `Stream>` - Stream of allocation clusters + pub fn fetch_alloc_clusters( + &self, + ) -> impl Stream> + '_ { + sqlx::query_as::<_, ClusterModel>(QUERY_ALLOC_CLUSTERS).fetch(&*self.connection_pool) + } + + /// Fetches all non-allocation clusters (MANUAL and HOSTNAME tags). + /// + /// Returns clusters defined by manual or hostname-based tags that are not + /// tied to specific facility/show allocations. + /// + /// # Returns + /// + /// * `Stream>` - Stream of non-allocation clusters + pub fn fetch_non_alloc_clusters( + &self, + ) -> impl Stream> + '_ { + sqlx::query_as::<_, ClusterModel>(QUERY_NON_ALLOC_CLUSTERS).fetch(&*self.connection_pool) + } + + /// Looks up a facility ID by facility name. + /// + /// # Arguments + /// + /// * `facility_name` - The name of the facility + /// + /// # Returns + /// + /// * `Ok(Uuid)` - The facility ID + /// * `Err(sqlx::Error)` - If facility not found or database error + pub async fn get_facility_id(&self, facility_name: &str) -> Result { + let row: (String,) = sqlx::query_as(QUERY_FACILITY_ID) + .bind(facility_name) + .fetch_one(&*self.connection_pool) + .await?; + Ok(parse_uuid(&row.0)) + } + + /// Looks up a show ID by show name. + /// + /// # Arguments + /// + /// * `show_name` - The name of the show + /// + /// # Returns + /// + /// * `Ok(Uuid)` - The show ID + /// * `Err(sqlx::Error)` - If show not found or database error + pub async fn get_show_id(&self, show_name: &str) -> Result { + let row: (String,) = sqlx::query_as(QUERY_SHOW_ID) + .bind(show_name) + .fetch_one(&*self.connection_pool) + .await?; + Ok(parse_uuid(&row.0)) + } +} diff --git a/rust/crates/scheduler/src/dao/frame_dao.rs b/rust/crates/scheduler/src/dao/frame_dao.rs new file mode 100644 index 000000000..c5f324825 --- /dev/null +++ b/rust/crates/scheduler/src/dao/frame_dao.rs @@ -0,0 +1,239 @@ +use std::time::SystemTime; + +use bytesize::{ByteSize, KB}; +use chrono::{DateTime, Utc}; +use miette::{Diagnostic, Result}; +use opencue_proto::job::FrameExitStatus; +use sqlx::{Postgres, Transaction}; +use thiserror::Error; + +use crate::{ + config::CONFIG, + dao::helpers::parse_uuid, + models::{CoreSize, DispatchFrame, VirtualProc}, +}; + +/// Data Access Object for frame operations in the job dispatch system. +/// +/// Handles database queries related to frames, particularly for finding +/// dispatchable frames within layers that meet resource constraints. +pub struct FrameDao {} + +/// Database model representing a frame ready for dispatch. +/// +/// Contains all the necessary information to dispatch a frame to a host, +/// including resource requirements, job metadata, and execution parameters. +/// This model maps directly to the database query results and is converted +/// to `DispatchFrame` for business logic processing. +#[derive(sqlx::FromRow)] +pub struct DispatchFrameModel { + // Entity fields + pub pk_frame: String, + pub str_frame_name: String, + + // LayerEntity fields + pub pk_show: String, + pub pk_facility: String, + pub pk_job: String, + + // FrameEntity fields + pub pk_layer: String, + + // DispatchFrame specific fields + pub str_cmd: String, + pub str_range: String, + pub int_chunk_size: i64, + pub str_show: String, + pub str_shot: String, + pub str_user: String, + pub int_uid: Option, + pub str_log_dir: String, + pub str_layer_name: String, + pub str_job_name: String, + pub int_min_cores: i32, + pub int_mem_min: i64, + pub b_threadable: bool, + pub int_gpus_min: i64, + pub int_gpu_mem_min: i64, + // On Cuebot these fields come from constants, maybe replicate these constants here + // pub int_soft_memory_limit: i64, + // pub int_hard_memory_limit: i64, + pub str_services: Option, + pub str_os: Option, + pub int_layer_cores_max: i32, + pub int_version: i32, + pub str_loki_url: Option, + pub ts_updated: Option>, +} + +impl From for DispatchFrame { + fn from(val: DispatchFrameModel) -> Self { + // Little closure to match a frames' list of services to the hardcode list of + // selfish services. + // + // TODO: The definitive solution for selfish services will require changes to the db, + // which at this moment would greatly impact the ability to deploy this scheduler on + // an active render farm. For now, read the list of servives from the config file, + // similar to Cuebot's approach + let has_selfish_service = |services: Vec| { + CONFIG + .queue + .selfish_services + .iter() + .any(|item| services.contains(item)) + }; + // Convert to SystemTime + let updated_at = match val.ts_updated { + Some(t) => SystemTime::from(t), + None => SystemTime::now(), + }; + + DispatchFrame { + id: parse_uuid(&val.pk_frame), + frame_name: val.str_frame_name, + show_id: parse_uuid(&val.pk_show), + facility_id: parse_uuid(&val.pk_facility), + job_id: parse_uuid(&val.pk_job), + layer_id: parse_uuid(&val.pk_layer), + command: val.str_cmd, + range: val.str_range, + chunk_size: val + .int_chunk_size + .try_into() + .expect("int_chunk_size fit on a i32"), + show_name: val.str_show, + shot: val.str_shot, + user: val.str_user, + uid: val + .int_uid + .map(|uid| uid.try_into().expect("int_uid should fit on a i32")), + log_dir: val.str_log_dir, + layer_name: val.str_layer_name, + job_name: val.str_job_name, + min_cores: CoreSize::from_multiplied(val.int_min_cores), + threadable: val.b_threadable, + min_gpus: val + .int_gpus_min + .try_into() + .expect("int_gpus_min should fit on a i32"), + min_gpu_memory: ByteSize::kb(val.int_gpu_mem_min as u64), + min_memory: ByteSize::kb(val.int_mem_min as u64), + services: val.str_services.clone(), + os: val.str_os, + loki_url: val.str_loki_url, + layer_cores_limit: (val.int_layer_cores_max > 0) + .then(|| CoreSize::from_multiplied(val.int_layer_cores_max)), + has_selfish_service: has_selfish_service( + val.str_services + .map(|services| services.split(",").map(|v| v.to_string()).collect()) + .unwrap_or_default(), + ), + version: val.int_version as u32, + updated_at, + } + } +} + +static UPDATE_FRAME_STARTED: &str = r#" +UPDATE frame SET + str_state = 'RUNNING', + str_host = $1, + int_cores = $2, + int_mem_reserved = $3, + int_gpus = $4, + int_gpu_mem_reserved = $5, + ts_updated = current_timestamp, + ts_started = current_timestamp, + ts_stopped = null, + int_version = int_version + 1 +WHERE pk_frame = $6 + AND str_state = 'WAITING' + AND int_version = $7 +"#; + +static UPDATE_RETRY_COUNT: &str = r#" +UPDATE frame SET + int_retries = int_retries + 1 +WHERE pk_frame = $1 + AND int_exit_status != ALL($2) +"#; + +impl FrameDao { + /// Creates a new FrameDao instance. + /// + /// # Returns + /// + /// * `Ok(FrameDao)` - New DAO instance + /// * `Err(miette::Error)` - Initialization failed + pub async fn new() -> Result { + // This is only here to keep a similar interface with other DAO modules + Ok(FrameDao {}) + } + + /// Updates a frame's state to RUNNING and assigns it to a host. + /// + /// Atomically transitions a frame from WAITING to RUNNING state, recording + /// the host assignment and reserved resources. Uses optimistic locking via + /// version field to prevent race conditions. Also respects layer limits. + /// + /// # Arguments + /// + /// * `transaction` - Database transaction for atomic update + /// * `virtual_proc` - Virtual proc containing frame and host assignment details + /// + /// # Returns + /// + /// * `Ok(())` - Frame successfully started + /// * `Err(miette::Error)` - Database update failed or frame no longer available + pub async fn update_frame_started( + &self, + transaction: &mut Transaction<'_, Postgres>, + virtual_proc: &VirtualProc, + ) -> Result<(), FrameDaoError> { + let result = sqlx::query(UPDATE_FRAME_STARTED) + .bind(virtual_proc.host_name.clone()) + .bind(virtual_proc.cores_reserved.value()) + .bind((virtual_proc.memory_reserved.as_u64() / KB) as i32) + .bind(virtual_proc.gpus_reserved as i32) + .bind((virtual_proc.gpu_memory_reserved.as_u64() / KB) as i32) + .bind(virtual_proc.frame.id.to_string()) + .bind(virtual_proc.frame.version as i32) + .execute(&mut **transaction) + .await + .map_err(FrameDaoError::DbFailure)?; + + // Check if the update actually modified a row + if result.rows_affected() == 0 { + return Err(FrameDaoError::FrameCouldNotBeUpdated); + } + + // Update retry count for frames that have been previously executed + let non_retriable_codes = &[ + FrameExitStatus::SkipRetry as i32, + FrameExitStatus::FailedLaunch as i32, + // Values predefined at Cuebot on Dispatcher.java + 299, // EXIT_STATUS_FRAME_CLEARED + 301, // EXIT_STATUS_FRAME_ORPHAN + 302, // EXIT_STATUS_FAILED_KILL + 399, // EXIT_STATUS_DOWN_HOST + -1, // Not set (This will skip frames that have never ran) + ]; + let _ = sqlx::query(UPDATE_RETRY_COUNT) + .bind(virtual_proc.frame.id.to_string()) + .bind(non_retriable_codes) + .execute(&mut **transaction) + .await + .map_err(FrameDaoError::DbFailure)?; + + Ok(()) + } +} + +#[derive(Debug, Error, Diagnostic)] +pub enum FrameDaoError { + #[error("Failed to lock frame for update. Frame possibly changed before being dispatched")] + FrameCouldNotBeUpdated, + + #[error("Failed to execute query")] + DbFailure(sqlx::Error), +} diff --git a/rust/crates/scheduler/src/dao/helpers.rs b/rust/crates/scheduler/src/dao/helpers.rs new file mode 100644 index 000000000..68c910739 --- /dev/null +++ b/rust/crates/scheduler/src/dao/helpers.rs @@ -0,0 +1,25 @@ +use uuid::Uuid; + +/// Parses a UUID string with case-insensitive handling. +/// +/// The database stores UUIDs as character varying(36) which may contain +/// uppercase or lowercase hex digits. This function normalizes the case +/// before parsing. +/// +/// # Arguments +/// +/// * `uuid_str` - String representation of a UUID +/// +/// # Returns +/// +/// * `Uuid` - Parsed UUID +/// +/// # Panics +/// +/// Panics if the string is not a valid UUID format. This is intentional +/// as invalid UUIDs in the database represent a data integrity issue. +pub fn parse_uuid(uuid_str: &str) -> Uuid { + // Uuid::parse_str is case-insensitive by default, but let's be explicit + Uuid::parse_str(&uuid_str.to_lowercase()) + .unwrap_or_else(|_| panic!("Invalid UUID in database: {}", uuid_str)) +} diff --git a/rust/crates/scheduler/src/dao/host_dao.rs b/rust/crates/scheduler/src/dao/host_dao.rs new file mode 100644 index 000000000..969cb10a5 --- /dev/null +++ b/rust/crates/scheduler/src/dao/host_dao.rs @@ -0,0 +1,436 @@ +use std::sync::Arc; + +use bytesize::{ByteSize, KB}; +use chrono::{DateTime, Utc}; +use miette::{Context, IntoDiagnostic, Result}; +use opencue_proto::host::ThreadMode; +use sqlx::{Pool, Postgres, Transaction}; +use tracing::trace; +use uuid::Uuid; + +use crate::{ + config::CONFIG, + dao::helpers::parse_uuid, + models::{CoreSize, Host, VirtualProc}, + pgpool::connection_pool, +}; + +/// Data Access Object for host operations in the job dispatch system. +/// +/// Manages database operations related to render hosts, including: +/// - Finding suitable hosts for layer dispatch +/// - Host resource locking and unlocking +/// - Updating host resource availability after dispatch +pub struct HostDao { + connection_pool: Arc>, +} +/// Updated resource counts after a host resource update operation. +/// +/// Contains the remaining idle resources on a host after dispatch. +pub struct UpdatedHostResources { + pub cores_idle: i64, + pub mem_idle: i64, + pub gpus_idle: i64, + pub gpu_mem_idle: i64, + pub last_updated: DateTime, +} + +/// Database model representing a host with its current resource availability. +/// +/// Contains host metadata, resource information, and allocation details +/// needed for dispatch matching. This model is converted to the business +/// logic `Host` type for processing. +#[derive(sqlx::FromRow)] +pub struct HostModel { + pk_host: String, + str_name: String, + str_os: Option, + int_cores_idle: i64, + int_mem_free: i64, + int_gpus_idle: i64, + #[allow(dead_code)] + int_gpu_mem_free: i64, + int_cores: i64, + int_mem_total: i64, + int_thread_mode: i32, + pk_alloc: String, + // Name of the allocation the host is subscribed to for a given show + str_alloc_name: String, + // Number of cores available at the subscription of the show this host has been queried on + int_alloc_available_cores: i64, + ts_ping: DateTime, +} + +impl From for Host { + fn from(val: HostModel) -> Self { + Host { + id: parse_uuid(&val.pk_host), + name: val.str_name, + str_os: val.str_os, + idle_cores: CoreSize::from_multiplied( + val.int_cores_idle + .try_into() + .expect("int_cores_min/multiplier should fit on a i32"), + ), + idle_memory: ByteSize::kb(val.int_mem_free as u64), + idle_gpus: val + .int_gpus_idle + .try_into() + .expect("int_gpus should fit on a i32"), + idle_gpu_memory: ByteSize::kb(0), + total_cores: CoreSize::from_multiplied( + val.int_cores + .try_into() + .expect("total_cores should fit on a i32"), + ), + total_memory: ByteSize::kb(val.int_mem_total as u64), + thread_mode: ThreadMode::try_from(val.int_thread_mode).unwrap_or_default(), + alloc_available_cores: CoreSize::from_multiplied( + val.int_alloc_available_cores + .try_into() + .expect("alloc_available_cores should fit on a i32"), + ), + alloc_id: parse_uuid(&val.pk_alloc), + alloc_name: val.str_alloc_name, + last_updated: val.ts_ping, + } + } +} + +static _QUERY_DISPATCH_HOST: &str = r#" +SELECT + h.pk_host, + h.str_name, + hs.str_os, + h.int_cores_idle, + h.int_mem_idle, + h.int_gpus_idle, + h.int_gpu_mem_idle, + h.int_cores, + h.int_mem, + h.int_thread_mode, + s.int_burst - s.int_cores as int_alloc_available_cores, + a.pk_alloc, + a.str_name as str_alloc_name, + h.ts_last_updated +FROM host h + INNER JOIN host_stat hs ON h.pk_host = hs.pk_host + INNER JOIN alloc a ON h.pk_alloc = a.pk_alloc + INNER JOIN subscription s ON s.pk_alloc = a.pk_alloc AND s.pk_show = $1 +WHERE LOWER(a.pk_facility) = LOWER($2) + AND (hs.str_os ILIKE $3 OR hs.str_os = '' and $4 = '') -- review + AND h.str_lock_state = 'OPEN' + AND hs.str_state = 'UP' + AND h.int_cores_idle >= $5 + AND h.int_mem_idle >= $6 + AND string_to_array($7, ' | ') && string_to_array(h.str_tags, ' ') + AND h.int_gpus_idle >= $8 + AND h.int_gpu_mem_idle >= $9 +ORDER BY + -- Hosts with least resources available come first in an attempt to fully book them + h.int_cores_idle::float / h.int_cores, + h.int_mem_idle::float / h.int_mem +LIMIT $10 +"#; + +// Host memory, cores and gpu values are stored at host and host_stat tables and are updated +// by different flows: +// - memory and core fields on table host are only updated when booking procs (update_host_resources) +// - the table host_stat contains memory fields that are updated by cuebot on HostReportHandler +// +// In summary, use host_stat for most up to date memory stats +static QUERY_HOST_BY_SHOW_FACILITY_AND_TAG: &str = r#" +SELECT DISTINCT + h.pk_host, + h.str_name, + hs.str_os, + h.int_cores_idle, + hs.int_mem_free, + h.int_gpus_idle, + hs.int_gpu_mem_free, + h.int_cores, + hs.int_mem_total, + h.int_thread_mode, + s.int_burst - s.int_cores as int_alloc_available_cores, + a.pk_alloc, + a.str_name as str_alloc_name, + hs.ts_ping +FROM host h + INNER JOIN host_stat hs ON h.pk_host = hs.pk_host + INNER JOIN alloc a ON h.pk_alloc = a.pk_alloc + INNER JOIN subscription s ON s.pk_alloc = a.pk_alloc AND s.pk_show = $1 + INNER JOIN host_tag ht ON h.pk_host = ht.pk_host +WHERE LOWER(a.pk_facility) = LOWER($2) + AND h.str_lock_state = 'OPEN' + AND hs.str_state = 'UP' + AND ht.str_tag = $3 +"#; + +static UPDATE_HOST_RESOURCES: &str = r#" +UPDATE host +SET int_cores_idle = int_cores_idle - $1, + int_mem_idle = int_mem_idle - $2, + int_gpus_idle = int_gpus_idle - $3, + int_gpu_mem_idle = int_gpu_mem_idle - $4 +WHERE pk_host = $5 +RETURNING int_cores_idle, int_mem_idle, int_gpus_idle, int_gpu_mem_idle, NOW() +"#; + +// This update is meant for testing environments where rqd is not constantly reporting +// host reports to Cuebot to get host_stats properly updated. +static UPDATE_HOST_STAT: &str = r#" +UPDATE host_stat +SET int_mem_free = int_mem_free - $1, + int_gpu_mem_free = int_gpu_mem_free - $2 +WHERE pk_host = $3 +"#; + +static UPDATE_SUBSCRIPTION: &str = r#" +UPDATE subscription +SET int_cores = int_cores + $1, + int_gpus = int_gpus + $2 +WHERE pk_show = $3 + AND pk_alloc = $4 +"#; + +static UPDATE_LAYER_RESOURCE: &str = r#" +UPDATE layer_resource +SET int_cores = int_cores + $1, + int_gpus = int_gpus + $2 +WHERE pk_layer = $3 +"#; + +static UPDATE_JOB_RESOURCE: &str = r#" +UPDATE job_resource +SET int_cores = int_cores + $1, + int_gpus = int_gpus + $2 +WHERE pk_job = $3 +"#; + +static UPDATE_FOLDER_RESOURCE: &str = r#" +UPDATE folder_resource +SET int_cores = int_cores + $1, + int_gpus = int_gpus + $2 +WHERE pk_folder = (SELECT pk_folder FROM job WHERE pk_job = $3) +"#; + +static UPDATE_POINT: &str = r#" +UPDATE point +SET int_cores = int_cores + $1, + int_gpus = int_gpus + $2 +WHERE pk_dept = (SELECT pk_dept FROM job WHERE pk_job = $3) + AND pk_show = $4 +"#; + +impl HostDao { + /// Creates a new HostDao from database configuration. + /// + /// Establishes a connection pool to the PostgreSQL database for + /// host-related operations. + /// + /// # Arguments + /// * `config` - Database configuration containing connection parameters + /// + /// # Returns + /// * `Ok(HostDao)` - Configured DAO ready for host operations + /// * `Err(miette::Error)` - If database connection fails + pub async fn new() -> Result { + let pool = connection_pool().await.into_diagnostic()?; + Ok(HostDao { + connection_pool: pool, + }) + } + + /// Fetches hosts matching a specific show, facility, and tag. + /// + /// Finds all open hosts that belong to allocations subscribed to the given show + /// and tagged with the specified tag. + /// + /// # Arguments + /// + /// * `show_id` - UUID of the show + /// * `facility_id` - UUID of the facility + /// * `tag` - Tag to match against host tags + /// + /// # Returns + /// + /// * `Ok(Vec)` - List of matching hosts + /// * `Err(sqlx::Error)` - Database query failed + pub async fn fetch_hosts_by_show_facility_tag<'a>( + &'a self, + show_id: Uuid, + facility_id: Uuid, + tag: &'a str, + ) -> Result, sqlx::Error> { + let out = sqlx::query_as::<_, HostModel>(QUERY_HOST_BY_SHOW_FACILITY_AND_TAG) + .bind(show_id.to_string()) + .bind(facility_id.to_string()) + .bind(tag) + .fetch_all(&*self.connection_pool) + .await; + + // TODO: Remove + // for h in out.as_ref().expect("?") { + // info!("CacheFetch: {} with {} cores", h.pk_host, h.int_cores_idle); + // } + out + } + + /// Acquires an advisory lock on a host to prevent concurrent dispatch. + /// + /// Uses PostgreSQL's advisory lock mechanism to ensure only one dispatcher + /// can modify a host's resources at a time. The lock is based on a hash + /// of the host ID string. + /// + /// # Arguments + /// * `host_id` - The UUID of the host to lock + /// + /// # Returns + /// * `Ok(true)` - Lock successfully acquired + /// * `Ok(false)` - Lock already held by another process + /// * `Err(miette::Error)` - Database operation failed + pub async fn lock( + &self, + transaction: &mut Transaction<'_, Postgres>, + host_id: &Uuid, + ) -> Result { + trace!("Locking {}", host_id); + sqlx::query_scalar::<_, bool>("SELECT pg_try_advisory_lock(hashtext($1))") + .bind(host_id.to_string()) + .fetch_one(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to acquire advisory lock") + } + + /// Releases an advisory lock on a host after dispatch completion. + /// + /// Releases the PostgreSQL advisory lock that was acquired during + /// the dispatch process, allowing other dispatchers to access the host. + /// + /// # Arguments + /// * `host_id` - The UUID of the host to unlock + /// + /// # Returns + /// * `Ok(true)` - Lock successfully released + /// * `Ok(false)` - Lock was not held by this process + /// * `Err(miette::Error)` - Database operation failed + pub async fn unlock( + &self, + transaction: &mut Transaction<'_, Postgres>, + host_id: &Uuid, + ) -> Result { + trace!("Unlocking {}", host_id); + sqlx::query_scalar::<_, bool>("SELECT pg_advisory_unlock(hashtext($1))") + .bind(host_id.to_string()) + .fetch_one(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to release advisory lock") + } + + /// Updates a host's available resource counts after frame dispatch. + /// + /// Modifies the host's idle resource counters in the database to reflect + /// resources consumed by dispatched frames. This ensures accurate resource + /// tracking for subsequent dispatch decisions. + /// + /// # Arguments + /// * `transaction` - Database transaction + /// * `host_id` - ID of the host to update + /// * `virtual_proc` - Virtual proc containing resource reservations + /// + /// # Returns + /// * `Ok(UpdatedHostResources)` - Updated idle resource counts after dispatch + /// * `Err(miette::Error)` - Database update failed + pub async fn update_resources( + &self, + transaction: &mut Transaction<'_, Postgres>, + host_id: &Uuid, + virtual_proc: &VirtualProc, + dispatch_id: Uuid, + ) -> Result { + let (cores_idle, mem_idle, gpus_idle, gpu_mem_idle, last_updated): ( + i64, + i64, + i64, + i64, + DateTime, + ) = sqlx::query_as(UPDATE_HOST_RESOURCES) + .bind(virtual_proc.cores_reserved.value()) + .bind((virtual_proc.memory_reserved.as_u64() / KB) as i64) + .bind(virtual_proc.gpus_reserved as i32) + .bind(virtual_proc.gpu_memory_reserved.as_u64() as i64) + .bind(host_id.to_string()) + .fetch_one(&mut **transaction) + .await + .into_diagnostic() + .wrap_err(format!("({dispatch_id}) Failed to update host resources"))?; + + if CONFIG.host_cache.update_stat_on_book { + sqlx::query(UPDATE_HOST_STAT) + .bind((virtual_proc.memory_reserved.as_u64() / KB) as i64) + .bind(virtual_proc.gpu_memory_reserved.as_u64() as i64) + .bind(host_id.to_string()) + .execute(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to update host stat")?; + } + + sqlx::query(UPDATE_SUBSCRIPTION) + .bind(virtual_proc.cores_reserved.value()) + .bind(virtual_proc.gpus_reserved as i32) + .bind(virtual_proc.show_id.to_string()) + .bind(virtual_proc.alloc_id.to_string()) + .execute(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to update subscription resources")?; + + sqlx::query(UPDATE_LAYER_RESOURCE) + .bind(virtual_proc.cores_reserved.value()) + .bind(virtual_proc.gpus_reserved as i32) + .bind(virtual_proc.layer_id.to_string()) + .execute(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to update layer resources")?; + + sqlx::query(UPDATE_JOB_RESOURCE) + .bind(virtual_proc.cores_reserved.value()) + .bind(virtual_proc.gpus_reserved as i32) + .bind(virtual_proc.job_id.to_string()) + .execute(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to update job resources")?; + + sqlx::query(UPDATE_FOLDER_RESOURCE) + .bind(virtual_proc.cores_reserved.value()) + .bind(virtual_proc.gpus_reserved as i32) + .bind(virtual_proc.job_id.to_string()) + .execute(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to update folder resources")?; + + sqlx::query(UPDATE_POINT) + .bind(virtual_proc.cores_reserved.value()) + .bind(virtual_proc.gpus_reserved as i32) + .bind(virtual_proc.job_id.to_string()) + .bind(virtual_proc.show_id.to_string()) + .execute(&mut **transaction) + .await + .into_diagnostic() + .wrap_err("Failed to update point resources")?; + + Ok(UpdatedHostResources { + cores_idle, + mem_idle, + gpus_idle, + gpu_mem_idle, + last_updated, + }) + } +} diff --git a/rust/crates/scheduler/src/dao/job_dao.rs b/rust/crates/scheduler/src/dao/job_dao.rs new file mode 100644 index 000000000..bb4e91aba --- /dev/null +++ b/rust/crates/scheduler/src/dao/job_dao.rs @@ -0,0 +1,237 @@ +use std::sync::Arc; + +use miette::{IntoDiagnostic, Result}; +use serde::{Deserialize, Serialize}; +use sqlx::{Pool, Postgres}; +use tracing::trace; +use uuid::Uuid; + +use crate::{ + cluster::Cluster, config::CONFIG, dao::helpers::parse_uuid, + metrics::observe_job_query_duration, models::DispatchJob, pgpool::connection_pool, +}; + +/// Data Access Object for job operations in the job dispatch system. +/// +/// Handles database queries related to jobs, specifically finding jobs +/// that are ready for dispatch processing based on show subscriptions, +/// resource limits, and job states. +pub struct JobDao { + connection_pool: Arc>, +} + +/// Database model representing a job ready for dispatch. +/// +/// Contains the essential job metadata needed for dispatch prioritization +/// and processing. This model is converted to `DispatchJob` for business +/// logic operations. +#[derive(sqlx::FromRow, Serialize, Deserialize)] +pub struct JobModel { + pub pk_job: String, + pub int_priority: i32, +} + +impl DispatchJob { + /// Creates a new DispatchJob from a database model and cluster assignment. + /// + /// # Arguments + /// + /// * `model` - Database model containing job ID and priority + /// * `cluster` - The cluster this job is assigned to for dispatch + /// + /// # Returns + /// + /// * `DispatchJob` - New dispatch job instance + pub fn new(model: JobModel, cluster: Cluster) -> Self { + DispatchJob { + id: parse_uuid(&model.pk_job), + int_priority: model.int_priority, + source_cluster: cluster, + } + } +} + +static QUERY_PENDING_BY_SHOW_FACILITY_TAG: &str = r#" +--bookable_shows: Shows that have room in at least one of its subscriptions +WITH bookable_shows AS ( + SELECT + distinct w.pk_show + FROM subscription s + INNER JOIN vs_waiting w ON s.pk_show = w.pk_show + WHERE s.pk_show = $1 + -- Burst == 0 is used to freeze a subscription + AND s.int_burst > 0 + -- At least one core unit available + AND s.int_burst - s.int_cores >= $2 + AND s.int_cores < s.int_burst +), +filtered_jobs AS ( + SELECT + j.pk_job, + jr.int_priority + FROM job j + INNER JOIN bookable_shows on j.pk_show = bookable_shows.pk_show + INNER JOIN job_resource jr ON j.pk_job = jr.pk_job + INNER JOIN folder f ON j.pk_folder = f.pk_folder + INNER JOIN folder_resource fr ON f.pk_folder = fr.pk_folder + INNER JOIN layer l ON l.pk_job = j.pk_job + WHERE j.str_state = 'PENDING' + AND j.b_paused = false + -- Check for room on folder resources + AND (fr.int_max_cores = -1 OR fr.int_cores + l.int_cores_min < fr.int_max_cores) + AND (fr.int_max_gpus = -1 OR fr.int_gpus + l.int_gpus_min < fr.int_max_gpus) + -- Match tags: jobs with at least one layer that contains the queried tag + AND string_to_array($3, ' | ') && string_to_array(l.str_tags, ' | ') + AND LOWER(j.pk_facility) = LOWER($4) +) +SELECT DISTINCT + fj.pk_job, + fj.int_priority +FROM filtered_jobs fj +INNER JOIN layer_stat ls ON fj.pk_job = ls.pk_job +WHERE ls.int_waiting_count > 0 +ORDER BY int_priority DESC +"#; + +static QUERY_PENDING_BY_TAGS: &str = r#" +--bookable_shows: Shows that have room in at least one of its subscriptions +WITH bookable_shows AS ( + SELECT + distinct w.pk_show + FROM subscription s + INNER JOIN vs_waiting w ON s.pk_show = w.pk_show + WHERE s.int_burst > 0 + AND s.int_burst - s.int_cores >= $1 + AND s.int_cores < s.int_burst +), +filtered_jobs AS( + SELECT + j.pk_job, + jr.int_priority + FROM job j + INNER JOIN job_resource jr ON j.pk_job = jr.pk_job + INNER JOIN folder f ON j.pk_folder = f.pk_folder + INNER JOIN folder_resource fr ON f.pk_folder = fr.pk_folder + INNER JOIN layer l ON l.pk_job = j.pk_job + INNER JOIN bookable_shows ON j.pk_show = bookable_shows.pk_show + WHERE + j.str_state = 'PENDING' + AND j.b_paused = false + AND (fr.int_max_cores = -1 OR fr.int_cores + l.int_cores_min < fr.int_max_cores) + AND (fr.int_max_gpus = -1 OR fr.int_gpus + l.int_gpus_min < fr.int_max_gpus) + AND string_to_array($2, ' | ') && string_to_array(l.str_tags, ' | ') + AND LOWER(j.pk_facility) = LOWER($3) +) +SELECT DISTINCT + fj.pk_job, + fj.int_priority +FROM filtered_jobs fj +INNER JOIN layer_stat ls ON fj.pk_job = ls.pk_job +WHERE ls.int_waiting_count > 0 +ORDER BY int_priority DESC +"#; + +impl JobDao { + /// Creates a new JobDao from database configuration. + /// + /// Establishes a connection pool to the PostgreSQL database for + /// job-related queries. + /// + /// # Arguments + /// * `config` - Database configuration containing connection parameters + /// + /// # Returns + /// * `Ok(JobDao)` - Configured DAO ready for job operations + /// * `Err(miette::Error)` - If database connection fails + pub async fn new() -> Result { + let pool = connection_pool().await.into_diagnostic()?; + + Ok(JobDao { + connection_pool: pool, + }) + } + + /// Queries for pending jobs by show, facility, and tag criteria. + /// + /// Finds jobs that are ready for dispatch based on subscription availability, + /// resource constraints, and tag matching. The query includes several filters: + /// - Show must have active subscriptions with available burst capacity + /// - Jobs must be in PENDING state and not paused + /// - Folder resource limits must not be exceeded + /// - Layer tags must match the specified tag + /// - Jobs must have waiting layers + /// + /// # Arguments + /// * `show_id` - The unique identifier of the show to query jobs for + /// * `facility_id` - The facility identifier (currently unused in query but available for future use) + /// * `tag` - The tag string to match against layer tags (pipe-separated format supported) + /// + /// # Returns + /// A stream of `JobModel` results ordered by priority (descending). + /// Each item in the stream is a `Result`. + /// + /// # Example + /// ```rust,ignore + /// let job_stream = job_dao.query_pending_jobs_by_show_facility_tag( + /// "show-123".to_string(), + /// "facility-456".to_string(), + /// "render | lighting".to_string(), + /// ); + /// ``` + pub async fn query_pending_jobs_by_show_facility_tag( + &self, + show_id: Uuid, + facility_id: Uuid, + tag: String, + ) -> Result, sqlx::Error> { + trace!( + "QUERY_PENDING_BY_SHOW_FACILITY_TAG= {}", + QUERY_PENDING_BY_SHOW_FACILITY_TAG + ); + trace!( + "QUERY_PENDING_BY_SHOW_FACILITY_TAG query args: show_id={}, core_multi={}, tag={}, facility_id={}", + show_id, CONFIG.queue.core_multiplier, tag, facility_id + ); + + let start = std::time::Instant::now(); + let result = sqlx::query_as::<_, JobModel>(QUERY_PENDING_BY_SHOW_FACILITY_TAG) + .bind(show_id.to_string()) + .bind(CONFIG.queue.core_multiplier as i32) + .bind(tag) + .bind(facility_id.to_string()) + .fetch_all(&*self.connection_pool) + .await; + observe_job_query_duration(start.elapsed()); + result + } + + /// Queries for pending jobs matching any of the specified tags. + /// + /// Finds jobs ready for dispatch based on tag matching and resource constraints. + /// Similar to `query_pending_jobs_by_show_facility_tag` but matches against multiple + /// tags without show or facility filtering. + /// + /// # Arguments + /// + /// * `tags` - List of tags to match against layer tags + /// + /// # Returns + /// + /// * `Ok(Vec)` - Jobs ordered by priority (descending) + /// * `Err(sqlx::Error)` - Database query failed + pub async fn query_pending_jobs_by_tags( + &self, + tags: Vec, + facility: Uuid, + ) -> Result, sqlx::Error> { + let start = std::time::Instant::now(); + let result = sqlx::query_as::<_, JobModel>(QUERY_PENDING_BY_TAGS) + .bind(CONFIG.queue.core_multiplier as i32) + .bind(tags.join(" | ").to_string()) + .bind(facility.to_string()) + .fetch_all(&*self.connection_pool) + .await; + observe_job_query_duration(start.elapsed()); + result + } +} diff --git a/rust/crates/scheduler/src/dao/layer_dao.rs b/rust/crates/scheduler/src/dao/layer_dao.rs new file mode 100644 index 000000000..c92cad09a --- /dev/null +++ b/rust/crates/scheduler/src/dao/layer_dao.rs @@ -0,0 +1,436 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use bytesize::ByteSize; +use chrono::{DateTime, Utc}; +use miette::{IntoDiagnostic, Result}; +use serde::{Deserialize, Serialize}; +use sqlx::{Pool, Postgres, Transaction}; +use tracing::debug; +use uuid::Uuid; + +use crate::{ + config::CONFIG, + dao::frame_dao::DispatchFrameModel, + dao::helpers::parse_uuid, + models::{CoreSize, DispatchLayer}, + pgpool::connection_pool, +}; + +/// Data Access Object for layer operations in the job dispatch system. +/// +/// Handles database queries related to layers within jobs, specifically +/// finding layers that have waiting frames and are ready for dispatch. +pub struct LayerDao { + connection_pool: Arc>, +} + +/// Database model representing a layer ready for dispatch. +/// +/// Contains layer metadata, resource requirements, and job context needed +/// for host matching and frame dispatch. This model is converted to +/// `DispatchLayer` for business logic processing. +#[derive(sqlx::FromRow, Serialize, Deserialize)] +pub struct DispatchLayerModel { + pub pk_layer: String, + pub pk_job: String, + pub pk_facility: String, + pub pk_show: String, + pub str_name: String, + pub str_job_name: String, + pub str_os: Option, + pub int_cores_min: i64, + pub int_mem_min: i64, + pub b_threadable: bool, + pub int_gpus_min: i64, + pub int_gpu_mem_min: i64, + pub str_tags: String, +} + +/// Combined model for batched layer and frame queries. +/// +/// This model contains both layer and frame data in a single row, +/// allowing us to fetch layers with their frames in one database call +/// instead of making nested queries. +#[derive(sqlx::FromRow)] +pub struct LayerWithFramesModel { + // Layer fields + pub pk_layer: String, + pub pk_job: String, + pub pk_facility: String, + pub pk_show: String, + pub layer_name: String, + pub job_name: String, + pub str_os: Option, + pub int_cores_min: i64, + pub int_mem_min: i64, + pub b_threadable: bool, + pub int_gpus_min: i64, + pub int_gpu_mem_min: i64, + pub str_tags: String, + + // Frame fields (Optional - NULL when no frames match) + pub pk_frame: Option, + pub str_frame_name: Option, + pub str_cmd: Option, + pub str_range: Option, + pub int_chunk_size: Option, + pub str_show: Option, + pub str_shot: Option, + pub str_user: Option, + pub int_uid: Option, + pub str_log_dir: Option, + pub str_layer_name: Option, + pub int_min_cores: Option, + pub int_mem_min_frame: Option, + pub int_gpus_min_frame: Option, + pub int_gpu_mem_min_frame: Option, + pub str_services: Option, + pub int_layer_cores_max: Option, + pub int_version: Option, + pub str_loki_url: Option, + pub ts_updated: Option>, +} + +impl DispatchLayer { + /// Creates a new DispatchLayer from database models. + /// + /// # Arguments + /// + /// * `layer` - Layer database model + /// * `frames` - Vector of frame database models belonging to this layer + /// + /// # Returns + /// + /// * `DispatchLayer` - New layer instance with converted frames + pub fn new(layer: DispatchLayerModel, frames: Vec) -> Self { + DispatchLayer { + id: parse_uuid(&layer.pk_layer), + job_id: parse_uuid(&layer.pk_job), + facility_id: parse_uuid(&layer.pk_facility), + show_id: parse_uuid(&layer.pk_show), + job_name: layer.str_job_name, + layer_name: layer.str_name, + str_os: layer.str_os, + cores_min: CoreSize::from_multiplied( + layer + .int_cores_min + .try_into() + .expect("int_cores_min should fit on a i32"), + ), + mem_min: ByteSize::kb(layer.int_mem_min as u64), + threadable: layer.b_threadable, + gpus_min: layer + .int_gpus_min + .try_into() + .expect("gpus_min should fit on a i32"), + gpu_mem_min: ByteSize::kb(layer.int_gpu_mem_min as u64), + tags: layer.str_tags.split(" | ").map(|t| t.to_string()).collect(), + frames: frames.into_iter().map(|f| f.into()).collect(), + } + } +} + +/// Batched query that fetches layers with their frames in a single database call. +/// This eliminates the nested database calls that could cause connection pool exhaustion. +static QUERY_LAYERS_WITH_FRAMES: &str = r#" +WITH dispatch_frames AS ( + SELECT + f.pk_frame, + f.str_name as str_frame_name, + j.pk_show, + j.pk_facility, + j.pk_job, + l.pk_layer, + l.str_cmd, + l.str_range, + l.int_chunk_size, + j.str_show, + j.str_shot, + j.str_user, + j.int_uid, + j.str_log_dir, + j.str_loki_url, + l.str_name as str_layer_name, + j.str_name as str_job_name, + j.int_min_cores, + l.int_mem_min as int_mem_min_frame, + l.b_threadable, + l.int_gpus_min as int_gpus_min_frame, + l.int_gpu_mem_min as int_gpu_mem_min_frame, + l.str_services, + l.int_cores_max as int_layer_cores_max, + f.int_dispatch_order, + f.int_layer_order, + f.int_version, + f.ts_updated, + -- Accumulate the number of cores that would be consumed + SUM(l.int_cores_min) OVER ( + PARTITION BY l.pk_layer + ORDER BY f.int_dispatch_order, f.int_layer_order + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS aggr_job_cores, + jr.int_max_cores as job_resource_core_limit, + jr.int_cores as job_resource_consumed_cores, + -- Add row number to limit frames per layer + ROW_NUMBER() OVER ( + PARTITION BY l.pk_layer + ORDER BY f.int_dispatch_order, f.int_layer_order + ) as frame_rank + FROM job j + INNER JOIN layer l ON j.pk_job = l.pk_job + INNER JOIN frame f ON l.pk_layer = f.pk_layer + INNER JOIN job_resource jr ON l.pk_job = jr.pk_job + INNER JOIN layer_stat ls on l.pk_layer = ls.pk_layer + WHERE j.pk_job = $1 + AND ls.int_waiting_count > 0 + AND string_to_array($2, ' | ') && string_to_array(l.str_tags, ' | ') + AND f.str_state = 'WAITING' +), +limited_frames AS ( + SELECT * FROM dispatch_frames + WHERE frame_rank <= $3 -- limit frames per layer + AND (job_resource_core_limit <= 0 OR (aggr_job_cores + job_resource_consumed_cores <= job_resource_core_limit)) +) +SELECT DISTINCT + -- Layer fields + l.pk_layer, + j.pk_job, + j.pk_facility, + j.pk_show, + l.str_name as layer_name, + j.str_name as job_name, + j.str_os, + l.int_cores_min, + l.int_mem_min, + l.b_threadable, + l.int_gpus_min, + l.int_gpu_mem_min, + l.str_tags, + l.int_dispatch_order, + + -- Frame fields (can be NULL if no frames) + lf.pk_frame, + lf.str_frame_name, + lf.str_cmd, + lf.str_range, + lf.int_chunk_size, + lf.str_show, + lf.str_shot, + lf.str_user, + lf.int_uid, + lf.str_log_dir, + lf.str_layer_name, + lf.int_min_cores, + lf.int_mem_min_frame, + lf.int_gpus_min_frame, + lf.int_gpu_mem_min_frame, + lf.str_services, + lf.int_layer_cores_max, + lf.int_version, + lf.int_dispatch_order, + lf.int_layer_order, + lf.str_loki_url, + lf.ts_updated +FROM job j + INNER JOIN layer l ON j.pk_job = l.pk_job + INNER JOIN layer_stat ls on l.pk_layer = ls.pk_layer + LEFT JOIN limited_frames lf ON l.pk_layer = lf.pk_layer +WHERE j.pk_job = $1 + AND ls.int_waiting_count > 0 + AND string_to_array($2, ' | ') && string_to_array(l.str_tags, ' | ') +ORDER BY + l.int_dispatch_order, + lf.int_dispatch_order, + lf.int_layer_order +"#; + +impl LayerDao { + /// Creates a new LayerDao from database configuration. + /// + /// Establishes a connection pool to the PostgreSQL database for + /// layer-related queries. + /// + /// # Arguments + /// * `config` - Database configuration containing connection parameters + /// + /// # Returns + /// * `Ok(LayerDao)` - Configured DAO ready for layer operations + /// * `Err(miette::Error)` - If database connection fails + pub async fn new() -> Result { + let pool = connection_pool().await.into_diagnostic()?; + Ok(LayerDao { + connection_pool: pool, + }) + } + + /// Fetches layers with their frames in a single batched database query. + /// + /// Uses a single SQL query with joins to fetch both layers and their frames, + /// eliminating nested queries that could exhaust the connection pool. Respects + /// the configured frame limit per layer. + /// + /// # Arguments + /// + /// * `pk_job` - UUID of the job to query layers for + /// * `tags` - Vector of tags to match against layer tags + /// + /// # Returns + /// + /// * `Ok(Vec)` - Layers with their frames, ordered by dispatch priority + /// * `Err(sqlx::Error)` - Database query failed + pub async fn query_layers( + &self, + pk_job: Uuid, + tags: Vec, + ) -> Result, sqlx::Error> { + let combined_models = sqlx::query_as::<_, LayerWithFramesModel>(QUERY_LAYERS_WITH_FRAMES) + .bind(pk_job.to_string()) + .bind(tags.join(" | ").to_string()) + .bind(CONFIG.queue.dispatch_frames_per_layer_limit as i32) + .fetch_all(&*self.connection_pool) + .await?; + debug!("Got {} frames", combined_models.len()); + + Ok(self.group_layers_and_frames(combined_models)) + } + + /// Groups flat query results into layers with their associated frames. + /// + /// Transforms the denormalized query results into a structured hierarchy + /// of layers containing their respective frames. + /// + /// # Arguments + /// + /// * `models` - Flat list of combined layer+frame records from database + /// + /// # Returns + /// + /// * `Vec` - Structured layers with grouped frames + fn group_layers_and_frames(&self, models: Vec) -> Vec { + let mut layers_map: HashMap)> = + HashMap::new(); + + for model in models { + // Extract layer data + let layer_model = DispatchLayerModel { + pk_layer: model.pk_layer.clone(), + pk_job: model.pk_job.clone(), + pk_facility: model.pk_facility.clone(), + pk_show: model.pk_show.clone(), + str_name: model.layer_name.clone(), + str_job_name: model.job_name.clone(), + str_os: model.str_os.clone(), + int_cores_min: model.int_cores_min, + int_mem_min: model.int_mem_min, + b_threadable: model.b_threadable, + int_gpus_min: model.int_gpus_min, + int_gpu_mem_min: model.int_gpu_mem_min, + str_tags: model.str_tags.clone(), + }; + + // Extract frame data (if present) + let frame_model = if let Some(pk_frame) = model.pk_frame { + Some(DispatchFrameModel { + pk_frame, + str_frame_name: model.str_frame_name.unwrap_or_default(), + pk_show: model.pk_show.clone(), + pk_facility: model.pk_facility.clone(), + pk_job: model.pk_job.clone(), + pk_layer: model.pk_layer.clone(), + str_cmd: model.str_cmd.unwrap_or_default(), + str_range: model.str_range.unwrap_or_default(), + int_chunk_size: model.int_chunk_size.unwrap_or(1), + str_show: model.str_show.unwrap_or_default(), + str_shot: model.str_shot.unwrap_or_default(), + str_user: model.str_user.unwrap_or_default(), + int_uid: model.int_uid, + str_log_dir: model.str_log_dir.unwrap_or_default(), + str_layer_name: model.str_layer_name.unwrap_or_default(), + str_job_name: model.job_name.clone(), + int_min_cores: model.int_min_cores.unwrap_or(100), // default core multiplier + int_mem_min: model.int_mem_min_frame.unwrap_or(0), + b_threadable: model.b_threadable, + int_gpus_min: model.int_gpus_min_frame.unwrap_or(0), + int_gpu_mem_min: model.int_gpu_mem_min_frame.unwrap_or(0), + str_services: model.str_services, + str_os: model.str_os.clone(), + int_layer_cores_max: model.int_layer_cores_max.unwrap_or(0), + int_version: model.int_version.unwrap_or(1), + str_loki_url: model.str_loki_url, + ts_updated: model.ts_updated, + }) + } else { + None + }; + + // Group by layer_id + let layer_entry = layers_map + .entry(model.pk_layer.clone()) + .or_insert((layer_model, vec![])); + + if let Some(frame) = frame_model { + layer_entry.1.push(frame); + } + } + + // Convert to DispatchLayer objects + layers_map + .into_values() + .map(|(layer_model, frame_models)| DispatchLayer::new(layer_model, frame_models)) + .collect() + } + + /// Checks if a layer has available capacity under its configured limits. + /// + /// Verifies that the sum of running frames across all layers sharing the same + /// limit record is below the maximum allowed value. Returns false if the layer + /// is at its limit, preventing further frame dispatch. + /// + /// # Arguments + /// + /// * `transaction` - Active database transaction + /// * `layer` - Layer to check limits for + /// + /// # Returns + /// + /// * `Ok(true)` - Layer has capacity available or no limits configured + /// * `Ok(false)` - Layer has reached its limit + /// * `Err(sqlx::Error)` - Database query failed + pub async fn check_limits( + &self, + transaction: &mut Transaction<'_, Postgres>, + layer: &DispatchLayer, + ) -> Result { + let res = sqlx::query( + r#" + SELECT layer.pk_layer + FROM layer + LEFT JOIN layer_limit ON layer_limit.pk_layer = layer.pk_layer + LEFT JOIN limit_record ON limit_record.pk_limit_record = layer_limit.pk_limit_record + LEFT JOIN ( + SELECT limit_record.pk_limit_record, + SUM(layer_stat.int_running_count) AS int_sum_running + FROM layer_limit + LEFT JOIN limit_record ON layer_limit.pk_limit_record = limit_record.pk_limit_record + LEFT JOIN layer_stat ON layer_stat.pk_layer = layer_limit.pk_layer + GROUP BY limit_record.pk_limit_record + ) AS sum_running ON limit_record.pk_limit_record = sum_running.pk_limit_record + WHERE layer.pk_layer = $1 + AND sum_running.int_sum_running < limit_record.int_max_value + OR sum_running.int_sum_running IS NULL + "#, + ) + .bind(layer.id.to_string()) + .fetch_one(&mut **transaction) + .await; + // Only return false if the query returns no row, which means the layer queried is at limit + match res { + Ok(_) => Ok(true), + Err(err) => match err { + sqlx::Error::RowNotFound => Ok(false), + _ => Err(err), + }, + } + } +} diff --git a/rust/crates/scheduler/src/dao/mod.rs b/rust/crates/scheduler/src/dao/mod.rs new file mode 100644 index 000000000..509a2d258 --- /dev/null +++ b/rust/crates/scheduler/src/dao/mod.rs @@ -0,0 +1,20 @@ +mod allocation_dao; +mod cluster_dao; +mod frame_dao; +pub mod helpers; +mod host_dao; +mod job_dao; +mod layer_dao; +mod proc_dao; + +pub use allocation_dao::AllocationDao; +pub use cluster_dao::ClusterDao; +pub use frame_dao::FrameDao; +pub use host_dao::HostDao; +pub use job_dao::JobDao; +pub use layer_dao::LayerDao; +pub use proc_dao::ProcDao; + +pub use allocation_dao::{AllocationName, ShowId}; +pub use frame_dao::FrameDaoError; +pub use host_dao::UpdatedHostResources; diff --git a/rust/crates/scheduler/src/dao/proc_dao.rs b/rust/crates/scheduler/src/dao/proc_dao.rs new file mode 100644 index 000000000..f592e9b96 --- /dev/null +++ b/rust/crates/scheduler/src/dao/proc_dao.rs @@ -0,0 +1,168 @@ +use futures::TryFutureExt; +use miette::{IntoDiagnostic, Result}; +use sqlx::{Pool, Postgres, Transaction}; +use std::sync::Arc; + +use crate::{config::CONFIG, models::VirtualProc, pgpool::connection_pool}; + +/// Data Access Object for proc (virtual processor) operations in the job dispatch system. +/// +/// A proc represents the allocation of compute resources (CPU cores, memory, GPUs) from a host +/// to execute a specific frame. This DAO manages the lifecycle of proc records in the database, +/// tracking which resources are reserved for which frames. +/// +/// # Purpose +/// +/// The `ProcDao` is responsible for: +/// - Creating new proc records when frames are dispatched to hosts +/// - Recording resource reservations (cores, memory, GPUs) for frame execution +/// - Maintaining the relationship between hosts, jobs, layers, frames, and allocated resources +/// +/// # Database Schema +/// +/// The proc table tracks: +/// - Resource identifiers (host, show, layer, job, frame) +/// - Reserved compute resources (cores, memory, GPUs) +/// - Pre-reserved and used memory tracking +/// - Local vs. remote dispatch flag +pub struct ProcDao { + /// Shared connection pool for database operations. + #[allow(dead_code)] + connection_pool: Arc>, +} + +/// SQL query to insert a new proc record into the database. +/// +/// Creates a proc entry that tracks resource allocation for a frame execution. +/// The query inserts all resource reservation details including CPU cores, memory, +/// GPUs, and the relationships to host, show, layer, job, and frame. +/// +/// # Parameters (Positional) +/// +/// 1. `pk_proc` - Unique proc identifier (UUID) +/// 2. `pk_host` - Host ID where the proc is allocated +/// 3. `pk_show` - Show ID that owns this proc +/// 4. `pk_layer` - Layer ID within the job +/// 5. `pk_job` - Job ID that this proc belongs to +/// 6. `pk_frame` - Frame ID being executed by this proc +/// 7. `int_cores_reserved` - Number of CPU cores reserved (as hundredths) +/// 8. `int_mem_reserved` - Amount of memory reserved (bytes) +/// 9. `int_mem_pre_reserved` - Pre-reserved memory amount (bytes) +/// 10. `int_mem_used` - Initial memory usage (bytes) +/// 11. `int_gpus_reserved` - Number of GPUs reserved +/// 12. `int_gpu_mem_reserved` - GPU memory reserved (bytes) +/// 13. `int_gpu_mem_pre_reserved` - Pre-reserved GPU memory (bytes) +/// 14. `int_gpu_mem_used` - Initial GPU memory usage (bytes) +/// 15. `b_local` - Whether this is a local dispatch (boolean) +static INSERT_PROC: &str = r#" + INSERT INTO proc ( + pk_proc, + pk_host, + pk_show, + pk_layer, + pk_job, + pk_frame, + int_cores_reserved, + int_mem_reserved, + int_mem_pre_reserved, + int_mem_used, + int_gpus_reserved, + int_gpu_mem_reserved, + int_gpu_mem_pre_reserved, + int_gpu_mem_used, + b_local + ) VALUES ( + $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15 + ) +"#; + +impl ProcDao { + /// Creates a new `ProcDao` instance with a connection pool. + /// + /// This constructor initializes the DAO by obtaining a shared database connection pool. + /// The connection pool is reused across all operations for efficiency. + /// + /// # Returns + /// + /// Returns `Ok(ProcDao)` on success, or an error if the connection pool cannot be obtained. + /// + /// # Errors + /// + /// This function will return an error if: + /// - The database connection pool cannot be created + /// - Database connection parameters are invalid + pub async fn new() -> Result { + let pool = connection_pool().await.into_diagnostic()?; + Ok(ProcDao { + connection_pool: pool, + }) + } + + /// Inserts a new proc record into the database within an existing transaction. + /// + /// Creates a database record representing the allocation of compute resources from a host + /// to execute a specific frame. This operation must be part of a larger transaction to + /// ensure atomicity with other dispatch operations (e.g., updating host resources, + /// creating frame assignments). + /// + /// # Arguments + /// + /// * `transaction` - Mutable reference to an active database transaction + /// * `virtual_proc` - The virtual processor model containing all resource allocation details + /// + /// # Resource Allocation Details + /// + /// The function records: + /// - **CPU Cores**: Reserved cores from the host (stored as hundredths of a core) + /// - **Memory**: Both reserved and pre-reserved memory amounts (initialized to the same value) + /// - **GPUs**: Number of GPUs reserved and their memory allocation + /// - **Initial Usage**: Memory used is set to the minimum reserved amount from config + /// - **Dispatch Type**: Whether this is a local or remote dispatch + /// + /// - Initial GPU memory used is set to 0 + /// + /// # Returns + /// + /// Returns `Ok(())` on successful insertion. + /// + /// # Errors + /// + /// This function will return an error if: + /// - The SQL query execution fails (e.g., constraint violations, connection issues) + /// - Foreign key constraints are violated (invalid host, show, layer, job, or frame IDs) + /// - The transaction is no longer active + pub async fn insert( + &self, + transaction: &mut Transaction<'_, Postgres>, + virtual_proc: &VirtualProc, + ) -> Result<(), (sqlx::Error, String, String)> { + sqlx::query(INSERT_PROC) + .bind(virtual_proc.proc_id.to_string()) + .bind(virtual_proc.host_id.to_string()) + .bind(virtual_proc.show_id.to_string()) + .bind(virtual_proc.layer_id.to_string()) + .bind(virtual_proc.job_id.to_string()) + .bind(virtual_proc.frame_id.to_string()) + .bind(virtual_proc.cores_reserved.value()) + // Memory is represented as KB on the database + .bind(virtual_proc.memory_reserved.0 as i64 / 1024) + .bind(virtual_proc.memory_reserved.0 as i64 / 1024) + .bind(CONFIG.queue.mem_reserved_min.0 as i64 / 1024) + .bind(virtual_proc.gpus_reserved as i32) + .bind(virtual_proc.gpu_memory_reserved.0 as i64 / 1024) + .bind(virtual_proc.gpu_memory_reserved.0 as i64 / 1024) + .bind(0) + .bind(virtual_proc.is_local_dispatch) + .execute(&mut **transaction) + .map_err(|err| { + ( + err, + virtual_proc.frame_id.to_string(), + virtual_proc.host_id.to_string(), + ) + }) + .await?; + + Ok(()) + } +} diff --git a/rust/crates/scheduler/src/host_cache/actor.rs b/rust/crates/scheduler/src/host_cache/actor.rs new file mode 100644 index 000000000..d8e273607 --- /dev/null +++ b/rust/crates/scheduler/src/host_cache/actor.rs @@ -0,0 +1,481 @@ +use actix::{Actor, ActorFutureExt, AsyncContext, Handler, ResponseActFuture, WrapFuture}; + +use bytesize::ByteSize; +use itertools::Itertools; +use miette::IntoDiagnostic; +use scc::{hash_map::OccupiedEntry, HashMap, HashSet}; +use std::{ + cmp::Ordering, + sync::{ + atomic::{self, AtomicU64}, + Arc, + }, + time::{Duration, SystemTime}, +}; + +use futures::{stream, StreamExt}; +use miette::Result; +use tokio::sync::Semaphore; +use tracing::{debug, error, info, trace}; + +use crate::{ + cluster_key::{ClusterKey, Tag, TagType}, + config::CONFIG, + dao::HostDao, + host_cache::*, + host_cache::{messages::*, store}, + models::{CoreSize, Host}, +}; + +#[derive(Clone)] +pub struct HostCacheService { + host_dao: Arc, + cluster_index: Arc>, + reserved_hosts: Arc>, + cache_hit: Arc, + cache_miss: Arc, + concurrency_semaphore: Arc, +} + +/// Use a reservation system to prevent race conditions when trying to book a host +/// that belongs to multiple groups. +struct HostReservation { + reserved_time: SystemTime, +} + +impl HostReservation { + pub fn new() -> Self { + HostReservation { + reserved_time: SystemTime::now(), + } + } + + pub fn expired(&self) -> bool { + self.reserved_time.elapsed().unwrap_or_default() > Duration::from_secs(10) + } +} + +impl Actor for HostCacheService { + type Context = actix::Context; + + fn started(&mut self, ctx: &mut Self::Context) { + let service_for_monitor = self.clone(); + let service_for_clean_up = self.clone(); + + ctx.run_interval(CONFIG.host_cache.monitoring_interval, move |_act, ctx| { + let service = service_for_monitor.clone(); + let actor_clone = service.clone(); + ctx.spawn(async move { service.refresh_cache().await }.into_actor(&actor_clone)); + }); + + ctx.run_interval(CONFIG.host_cache.clean_up_interval, move |_act, _ctx| { + let service = service_for_clean_up.clone(); + + // Clean up stale hosts from the host store + service.cleanup_stale_hosts(); + }); + + info!("HostCacheService actor started"); + } + + fn stopped(&mut self, _ctx: &mut Self::Context) { + info!("HostCacheService actor stopped"); + } +} + +impl Handler> for HostCacheService +where + F: Fn(&Host) -> bool + 'static, +{ + type Result = ResponseActFuture>; + + fn handle(&mut self, msg: CheckOut, _ctx: &mut Self::Context) -> Self::Result { + let CheckOut { + facility_id, + show_id, + tags, + cores, + memory, + validation, + } = msg; + + let service = self.clone(); + + Box::pin( + async move { + let out = service + .check_out(facility_id, show_id, tags, cores, memory, validation) + .await; + if let Ok(host) = &out { + debug!("Checked out {}", host.1); + } + out + } + .into_actor(self) + .map(|result, _, _| result), + ) + } +} + +impl Handler for HostCacheService { + type Result = (); + + fn handle(&mut self, msg: CheckIn, _ctx: &mut Self::Context) -> Self::Result { + let CheckIn(cluster_key, payload) = msg; + match payload { + CheckInPayload::Host(host) => { + let host_str = format!("{}", host); + self.check_in(cluster_key, host); + + debug!("Checked in {}", &host_str); + } + CheckInPayload::Invalidate(host_id) => { + let _ = self.reserved_hosts.remove_sync(&host_id); + + debug!("Checked in {} (invalid)", &host_id); + } + } + } +} + +impl Handler for HostCacheService { + type Result = CacheRatioResponse; + + fn handle(&mut self, _msg: CacheRatio, _ctx: &mut Self::Context) -> Self::Result { + CacheRatioResponse { + hit: self.cache_hit.load(atomic::Ordering::Relaxed), + miss: self.cache_miss.load(atomic::Ordering::Relaxed), + hit_ratio: self.cache_hit_ratio(), + } + } +} + +impl HostCacheService { + /// Creates a new HostCacheService with empty cache groups. + /// + /// Initializes the service with DAO access, cache tracking metrics, + /// and concurrency controls. + /// + /// # Returns + /// + /// * `Ok(HostCacheService)` - New service instance + /// * `Err(miette::Error)` - Failed to initialize dependencies + pub(in crate::host_cache) async fn new() -> Result { + Ok(HostCacheService { + host_dao: Arc::new(HostDao::new().await?), + cluster_index: Arc::new(HashMap::new()), + cache_hit: Arc::new(AtomicU64::new(0)), + cache_miss: Arc::new(AtomicU64::new(0)), + concurrency_semaphore: Arc::new(Semaphore::new( + CONFIG.host_cache.concurrent_fetch_permit, + )), + reserved_hosts: Arc::new(HashMap::new()), + }) + } + + /// Checks out a host from the cache that matches the requirements. + /// + /// Searches through cache groups for each tag until a suitable host is found. + /// If not found in cache, fetches from database. Implements host reservation + /// to prevent race conditions. + /// + /// # Arguments + /// + /// * `facility_id` - Facility identifier + /// * `show_id` - Show identifier + /// * `tags` - List of tags to search (tried in priority order) + /// * `cores` - Minimum cores required + /// * `memory` - Minimum memory required + /// * `validation` - Additional validation function + /// + /// # Returns + /// + /// * `Ok(CheckedOutHost)` - Host with cluster key + /// * `Err(HostCacheError)` - No suitable host found or database error + async fn check_out( + &self, + facility_id: Uuid, + show_id: Uuid, + tags: Vec, + cores: CoreSize, + memory: ByteSize, + validation: F, + ) -> Result + where + F: Fn(&Host) -> bool, + { + let cache_keys = self.gen_cache_keys(facility_id, show_id, tags); + + // Extend validation to also check for hosts that are already reserved + let validation = |host: &Host| { + let available = self + .reserved_hosts + .read_sync(&host.id, |_, reservation| reservation.expired()) + .unwrap_or(true); + validation(host) && available + }; + + for cache_key in cache_keys { + // Attempt to read from the cache + let cached_candidate = self + .cluster_index + // Using the async counterpart here to prevent blocking during checkout. + // As the number of groups is not very large, consumers are eventually going to + // fight for the same rows. + .read_async(&cache_key, |_, cached_group| { + if !cached_group.expired() { + cached_group + // Checkout host from a group + .check_out(cores, memory, validation) + .map(|host| (cache_key.clone(), host.clone())) + .ok() + } else { + None + } + }) + .await + .flatten(); + + // Fetch form the database if not found on cache + match cached_candidate { + Some(cached) => { + self.reserve_host(cached.1.id, true); + return Ok(CheckedOutHost(cached.0, cached.1)); + } + None => { + let group = self + .fetch_group_data(&cache_key) + .await + .map_err(|err| HostCacheError::FailedToQueryHostCache(err.to_string()))?; + let checked_out_host = group + // Checkout host from a group + .check_out(cores, memory, validation) + .map(|host| CheckedOutHost(cache_key.clone(), host.clone())); + + if let Ok(checked_out_host) = checked_out_host { + self.reserve_host(checked_out_host.1.id, false); + // Only count as a cache miss if there was a host candidate available + return Ok(checked_out_host); + } else { + debug!( + "Wasn't able to find suitable hosts for group {:?}", + &cache_key + ); + } + } + } + } + Err(HostCacheError::NoCandidateAvailable) + } + + /// Reserves a host to prevent concurrent checkout and tracks cache metrics. + /// + /// Marks the host as reserved and updates hit/miss counters for cache + /// performance tracking. + /// + /// # Arguments + /// + /// * `host_id` - ID of the host to reserve + /// * `cache_hit` - Whether this was a cache hit (true) or miss (false) + fn reserve_host(&self, host_id: HostId, cache_hit: bool) { + if cache_hit { + self.cache_hit.fetch_add(1, atomic::Ordering::Relaxed); + } else { + self.cache_miss.fetch_add(1, atomic::Ordering::Relaxed); + } + // Mark host as reserved + let _ = self + .reserved_hosts + .insert_sync(host_id, HostReservation::new()); + } + + /// Returns a host to the cache group after use. + /// + /// Removes the host from reservation and adds it back to the appropriate + /// cache group. If the group has expired, the host is dropped. + /// + /// # Arguments + /// + /// * `cluster_key` - The cluster key identifying the cache group + /// * `host` - Host to return to the cache + fn check_in(&self, cluster_key: ClusterKey, host: Host) { + trace!( + "{}: Attempting to checkin ({}, {})", + cluster_key, + host.id, + host.idle_cores + ); + let host_id = host.id; + + match self.cluster_index.get_sync(&cluster_key) { + Some(group) => { + group.check_in(host, false); + } + None => { + info!( + "{} checking in on unexisting group ({}).", + host.id, cluster_key + ); + // Noop. The group might have expired and will be updated on demand + } + } + let _ = self.reserved_hosts.remove_sync(&host_id); + + trace!("{}: Done checkin", cluster_key); + } + + /// Calculates the cache hit ratio as a percentage. + /// + /// # Returns + /// + /// * `usize` - Hit ratio percentage (0-100) + fn cache_hit_ratio(&self) -> usize { + let hit = self.cache_hit.load(atomic::Ordering::Relaxed) as f64; + let miss = self.cache_miss.load(atomic::Ordering::Relaxed) as f64; + + ((hit / (hit + miss)) * 100.0) as usize + } + + /// Generates cache keys from tags in priority order. + /// + /// Creates ClusterKey instances for each tag and sorts them by priority: + /// MANUAL > HOSTNAME > ALLOC. This order ensures more specific tags are + /// checked first. + /// + /// # Arguments + /// + /// * `facility_id` - Facility identifier + /// * `show_id` - Show identifier + /// * `tags` - Tags to convert to cache keys + /// + /// # Returns + /// + /// * `impl IntoIterator` - Sorted cache keys + #[allow(clippy::map_entry)] + fn gen_cache_keys( + &self, + facility_id: Uuid, + show_id: Uuid, + tags: Vec, + ) -> impl IntoIterator { + tags.into_iter() + .map(|tag| ClusterKey { + facility_id, + show_id, + tag, + }) + // Make sure tags are evaluated in this order: MANUAL -> HOSTNAME -> ALLOC + .sorted_by(|l, r| match (&l.tag.ttype, &r.tag.ttype) { + (TagType::Alloc, TagType::Alloc) + | (TagType::HostName, TagType::HostName) + | (TagType::Manual, TagType::Manual) => Ordering::Equal, + (TagType::Manual, _) => Ordering::Less, + (TagType::HostName, _) => Ordering::Less, + (TagType::Alloc, _) => Ordering::Greater, + }) + } + + /// Periodically refreshes cache data and removes idle groups. + /// + /// Runs on a timer to update active cache groups from the database and + /// remove groups that haven't been queried recently. + async fn refresh_cache(&self) { + let caches = Arc::new(&self.cluster_index); + + // Clone list of groups keys to avoid keeping a lock through the stream lifetime + let mut cloned_keys = Vec::new(); + self.cluster_index.iter_sync(|key, value| { + cloned_keys.push((key.clone(), value.is_idle())); + true + }); + + let groups_for_removal = HashSet::new(); + + stream::iter(cloned_keys) + .map(|(group_key, is_idle)| { + let groups_for_removal = groups_for_removal.clone(); + + async move { + // Skip groups if it exists on the cache but haven't been queried for a while + if is_idle { + if let Err(err) = groups_for_removal.insert_async(group_key).await { + error!("Failed to mark group for removal on host_cache. {}", err); + } + } else if let Err(err) = self.fetch_group_data(&group_key).await { + error!( + "Failed to fetch cache data on cache loop for key {}.{}", + group_key, err + ); + } + } + }) + .buffer_unordered(CONFIG.host_cache.concurrent_groups) + .collect::>() + .await; + + // Clean up caches that haven't been queried for a while + groups_for_removal.iter_sync(|key| { + caches.remove_sync(key); + true + }); + } + + /// Removes stale hosts from the global host store. + /// + /// This method triggers cleanup of hosts that haven't been updated within + /// the configured `host_staleness_threshold` duration. It logs the number + /// of hosts removed for monitoring purposes. + fn cleanup_stale_hosts(&self) { + let removed_count = store::HOST_STORE.cleanup_stale_hosts(); + if removed_count > 0 { + info!("Cleaned up {} stale hosts from store", removed_count); + } + } + + /// Fetches host data from the database and populates a cache group. + /// + /// Queries the database for hosts matching the cluster key and adds them + /// to the cache. Uses a semaphore to limit concurrent database queries. + /// + /// # Arguments + /// + /// * `key` - Cluster key identifying which hosts to fetch + /// + /// # Returns + /// + /// * `Ok(OccupiedEntry)` - Cache entry with fetched hosts + /// * `Err(miette::Error)` - Database query failed + async fn fetch_group_data( + &self, + key: &ClusterKey, + ) -> Result> { + let _permit = self + .concurrency_semaphore + .acquire() + .await + .into_diagnostic()?; + + let tag = key.tag.to_string(); + let hosts = self + .host_dao + .fetch_hosts_by_show_facility_tag(key.show_id, key.facility_id, &tag) + .await + .into_diagnostic()?; + + let cache = self.cluster_index.entry_sync(key.clone()).or_default(); + + if hosts.is_empty() { + debug!( + "Found no suitable hosts on the database for the cluster key {:?}", + key + ); + } + + for host in hosts { + let h: Host = host.into(); + cache.check_in(h, false); + } + cache.ping_fetch(); + Ok(cache) + } +} diff --git a/rust/crates/scheduler/src/host_cache/cache.rs b/rust/crates/scheduler/src/host_cache/cache.rs new file mode 100644 index 000000000..e090f7714 --- /dev/null +++ b/rust/crates/scheduler/src/host_cache/cache.rs @@ -0,0 +1,622 @@ +/// A cache of hosts organized in B-trees to speed up searching and traversing hosts in order. +/// +/// Host are queried by their number of available cores and available memory. To speed up this +/// search, they are stored in groups organized as the example bellow: +/// +/// * 2-cores: +/// - <= 2GB +/// - > 2GB <= 4GB +/// - > 4GB <= 6GB +/// - > 6GB <= 8GB +/// * 4-cores: +/// - <= 2GB +/// - > 2GB <= 4GB +/// - > 4GB <= 6GB +/// - > 6GB <= 8GB +/// * 5-cores: +/// - <= 2GB +/// - > 2GB <= 4GB +/// - > 4GB <= 6GB +/// - > 6GB <= 8GB +/// +/// ... +use std::{ + collections::{BTreeMap, HashSet}, + sync::RwLock, + time::{Duration, SystemTime}, +}; + +use bytesize::ByteSize; +use miette::Result; +use uuid::Uuid; + +use crate::{ + config::{HostBookingStrategy, CONFIG}, + host_cache::{store::HOST_STORE, HostCacheError, HostId}, + models::{CoreSize, Host}, +}; + +type CoreKey = u32; +type MemoryKey = u64; + +/// A B-Tree of Hosts ordered by memory +pub type MemoryBTree = BTreeMap>; + +pub struct HostCache { + /// B-Tree of host groups ordered by their number of available cores + hosts_index: RwLock>, + /// If a cache stops being queried for a certain amount of time, stop keeping it up to date + last_queried: RwLock, + /// Marks if the data on this cache have expired + last_fetched: RwLock>, + strategy: HostBookingStrategy, +} + +impl Default for HostCache { + fn default() -> Self { + HostCache { + hosts_index: RwLock::new(BTreeMap::new()), + last_queried: RwLock::new(SystemTime::now()), + last_fetched: RwLock::new(None), + strategy: CONFIG.queue.host_booking_strategy, + } + } +} + +impl HostCache { + /// Updates the last queried timestamp to prevent cache expiration. + /// + /// Called whenever the cache is accessed to track activity and prevent + /// idle timeout expiration. + fn ping_query(&self) { + let mut lock = self + .last_queried + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + *lock = SystemTime::now(); + } + + /// Updates the last fetched timestamp to mark cache data as fresh. + /// + /// Called after fetching new data from the database to mark when the + /// cache was last refreshed. + pub fn ping_fetch(&self) { + let mut lock = self + .last_fetched + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + *lock = Some(SystemTime::now()); + } + + /// Checks if the cache data has expired and needs refreshing. + /// + /// Cache expires after the configured group_idle_timeout period has elapsed + /// since the last fetch. + /// + /// # Returns + /// + /// * `bool` - True if cache data has expired + pub fn expired(&self) -> bool { + let lock = self + .last_fetched + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + matches!(*lock, + Some(last_fetched) + if last_fetched.elapsed().unwrap_or(Duration::from_secs(1)) + > CONFIG.host_cache.group_idle_timeout) + } + + /// Checks if the cache has been idle for too long without queries. + /// + /// Used to determine if a cache group should be removed to save memory. + /// + /// # Returns + /// + /// * `bool` - True if cache hasn't been queried within the idle timeout period + pub fn is_idle(&self) -> bool { + self.last_queried + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .elapsed() + .unwrap_or(Duration::from_secs(1)) + > CONFIG.host_cache.group_idle_timeout + } + + /// Checks out the best matching host from the cache. + /// + /// Finds a host with sufficient resources that passes the validation function, + /// removes it from the cache, and returns it. The host must be checked back in + /// after use. + /// + /// # Arguments + /// + /// * `cores` - Minimum number of cores required + /// * `memory` - Minimum memory required + /// * `validation` - Function to validate additional host requirements + /// + /// # Returns + /// + /// * `Ok(Host)` - Successfully checked out host + /// * `Err(HostCacheError)` - No suitable host available + pub fn check_out( + &self, + cores: CoreSize, + memory: ByteSize, + validation: F, + ) -> Result + where + F: Fn(&Host) -> bool, + { + self.ping_query(); + + let host = self + .remove_host(cores, memory, validation) + .ok_or(HostCacheError::NoCandidateAvailable)?; + + Ok(host) + } + + /// Removes a suitable host from the cache based on resource requirements. + /// + /// Searches for a host with at least the requested cores and memory that + /// passes the validation function. Uses atomic operations with retry logic + /// to prevent race conditions where host state changes between lookup and removal. + /// + /// # Arguments + /// + /// * `cores` - Minimum number of cores required + /// * `memory` - Minimum memory required + /// * `validation` - Function to validate additional requirements + /// + /// # Returns + /// + /// * `Some(Host)` - Host that meets all requirements + /// * `None` - No suitable host found + fn remove_host(&self, cores: CoreSize, memory: ByteSize, validation: F) -> Option + where + F: Fn(&Host) -> bool, + { + let core_key = cores.value() as u32; + let memory_key = Self::gen_memory_key(memory); + let host_validation = |host: &Host| { + validation(host) && host.idle_memory >= memory && host.idle_cores >= cores + }; + + // Step 1: Find a candidate host in the index + let candidate_info = { + let host_index_lock = self.hosts_index.read().unwrap_or_else(|p| p.into_inner()); + let mut iter: Box> = + if !self.strategy.core_saturation { + // Reverse order to find hosts with max amount of cores available + Box::new(host_index_lock.range(core_key..).rev()) + } else { + Box::new(host_index_lock.range(core_key..)) + }; + + iter.find_map(|(by_core_key, hosts_by_memory)| { + let find_fn = |(by_memory_key, hosts): (&u64, &HashSet)| { + hosts.iter().find_map(|host_id| { + HOST_STORE.get(host_id).and_then(|host| { + // Check validation and memory capacity + if host_validation(&host) { + Some((*by_core_key, *by_memory_key, *host_id, host.last_updated)) + } else { + None + } + }) + }) + }; + + if self.strategy.memory_saturation { + // Search for hosts with at least the same amount of memory requested + hosts_by_memory.range(memory_key..).find_map(find_fn) + } else { + // Search for hosts with the most amount of memory available + hosts_by_memory.range(memory_key..).rev().find_map(find_fn) + } + }) + }; + + // Step 2: Attempt atomic removal if we found a candidate + if let Some((by_core_key, by_memory_key, host_id, expected_last_updated)) = candidate_info { + // Atomic check-and-remove from HOST_STORE + // Ensure host is still valid when it's time to remove it + match HOST_STORE.atomic_remove_if_valid( + &host_id, + expected_last_updated, + host_validation, + ) { + Ok(Some(removed_host)) => { + // Successfully removed from store, now remove from index + let mut host_index_lock = + self.hosts_index.write().unwrap_or_else(|p| p.into_inner()); + + // Remove from hosts_by_core_and_memory index + host_index_lock + .get_mut(&by_core_key) + .and_then(|hosts_by_memory| hosts_by_memory.get_mut(&by_memory_key)) + .map(|hosts| hosts.remove(&host_id)); + + return Some(removed_host); + } + Ok(None) => { + // Host was removed by another thread, try again + return None; + } + Err(()) => { + // Host state changed, retry the entire operation + return None; + } + } + } + + None + } + + /// Returns a host to the cache after use. + /// + /// Updates the cache with the host's current resource state. If the host + /// already exists in the cache, it's updated with the new values. The host + /// is indexed by its current idle cores and memory for efficient lookup. + /// + /// This method now performs all updates atomically under a single write lock, + /// preventing race conditions with concurrent check_out operations. + /// + /// # Arguments + /// + /// * `host` - Host to return to the cache + pub fn check_in(&self, host: Host, authoritative: bool) { + let host_id = host.id; + + // Update the data_store with new version + let last_host_version = HOST_STORE.insert(host, authoritative); + + let core_key = last_host_version.idle_cores.value() as CoreKey; + let memory_key = Self::gen_memory_key(last_host_version.idle_memory); + + let mut host_index = self + .hosts_index + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + + // Insert at the new location + host_index + .entry(core_key) + .or_default() + .entry(memory_key) + .or_default() + .insert(host_id); + } + + /// Generates a memory key for cache indexing by bucketing memory values. + /// + /// Divides memory by the configured divisor to group hosts with similar + /// memory into the same bucket, reducing cache fragmentation. + /// + /// # Arguments + /// + /// * `memory` - Memory amount to convert to a key + /// + /// # Returns + /// + /// * `MemoryKey` - Bucketed memory key for indexing + fn gen_memory_key(memory: ByteSize) -> MemoryKey { + memory.as_u64() / CONFIG.host_cache.memory_key_divisor.as_u64() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use opencue_proto::host::ThreadMode; + use std::thread; + use std::time::Duration; + use tokio_test::{assert_err, assert_ok}; + use uuid::Uuid; + + fn create_test_host(id: Uuid, idle_cores: i32, idle_memory: ByteSize) -> Host { + Host { + id, + name: format!("test-host-{}", id), + str_os: Some("Linux".to_string()), + total_cores: CoreSize(idle_cores), + total_memory: idle_memory, + idle_cores: CoreSize(idle_cores), + idle_memory, + idle_gpus: 0, + idle_gpu_memory: ByteSize::kb(0), + thread_mode: ThreadMode::Auto, + alloc_available_cores: CoreSize(idle_cores), + alloc_id: Uuid::new_v4(), + alloc_name: "test".to_string(), + last_updated: Utc::now(), + } + } + + #[test] + fn test_new_host_cache() { + let cache = HostCache::default(); + let hosts_index = cache.hosts_index.read().unwrap(); + assert!(hosts_index.is_empty()); + drop(hosts_index); + assert!(!cache.expired()); + } + + #[test] + fn test_ping_query_updates_last_queried() { + let cache = HostCache::default(); + let initial_time = *cache.last_queried.read().unwrap(); + + thread::sleep(Duration::from_millis(1)); + cache.ping_query(); + + let updated_time = *cache.last_queried.read().unwrap(); + assert!(updated_time > initial_time); + } + + #[test] + fn test_ping_fetch_updates_last_fetched() { + let cache = HostCache::default(); + assert!(cache.last_fetched.read().unwrap().is_none()); + + cache.ping_fetch(); + + assert!(cache.last_fetched.read().unwrap().is_some()); + } + + #[test] + fn test_expired_when_never_fetched() { + let cache = HostCache::default(); + assert!(!cache.expired()); + } + + #[test] + fn test_expired_when_recently_fetched() { + let cache = HostCache::default(); + cache.ping_fetch(); + assert!(!cache.expired()); + } + + #[test] + fn test_is_idle_when_recently_queried() { + let cache = HostCache::default(); + cache.ping_query(); + assert!(!cache.is_idle()); + } + + #[test] + fn test_insert_host() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host = create_test_host(host_id, 4, ByteSize::gb(8)); + + cache.check_in(host.clone(), false); + + assert!(HOST_STORE.get(&host_id).is_some()); + let hosts_index = cache.hosts_index.read().unwrap(); + assert!(!hosts_index.is_empty()); + } + + #[test] + fn test_insert_host_updates_existing() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host1 = create_test_host(host_id, 4, ByteSize::gb(8)); + let mut host2 = create_test_host(host_id, 8, ByteSize::gb(16)); + host2.name = "updated-host".to_string(); + + cache.check_in(host1, false); + cache.check_in(host2.clone(), false); + + // The host should be updated with new resources + let stored_host = HOST_STORE.get(&host_id).unwrap(); + assert_eq!(stored_host.idle_cores.value(), 8); + assert_eq!(stored_host.idle_memory, ByteSize::gb(16)); + assert_eq!(stored_host.name, "updated-host"); + } + + #[test] + fn test_checkout_success() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host = create_test_host(host_id, 4, ByteSize::gb(8)); + + cache.check_in(host, false); + + let result = cache.check_out( + CoreSize(2), + ByteSize::gb(4), + |_| true, // Always validate true + ); + + let checked_out_host = assert_ok!(result); + let memory_key = HostCache::gen_memory_key(checked_out_host.idle_memory); + let core_key = checked_out_host.idle_cores.value() as u32; + + assert_eq!(checked_out_host.id, host_id); + + assert!(HOST_STORE.get(&host_id).is_none()); + + let hosts_index = cache.hosts_index.read().unwrap(); + let left_over_host = hosts_index + .get(&core_key) + .and_then(|hosts_by_memory| hosts_by_memory.get(&memory_key)) + .and_then(|hosts| hosts.get(&checked_out_host.id)); + assert!(left_over_host.is_none()) + } + + #[test] + fn test_checkout_no_candidate_available() { + let cache = HostCache::default(); + + let result = cache.check_out(CoreSize(4), ByteSize::gb(8), |_| true); + + assert!(result.is_err()); + assert!(matches!(result, Err(HostCacheError::NoCandidateAvailable))); + } + + #[test] + fn test_checkout_insufficient_cores() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host = create_test_host(host_id, 2, ByteSize::gb(8)); + + cache.check_in(host, false); + + let result = cache.check_out( + CoreSize(4), // Request more cores than available + ByteSize::gb(4), + |_| true, + ); + + assert!(result.is_err()); + } + + #[test] + fn test_checkout_insufficient_memory() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host = create_test_host(host_id, 4, ByteSize::gb(4)); + + cache.check_in(host, false); + + let result = cache.check_out( + CoreSize(2), + ByteSize::gb(8), // Request more memory than available + |_| true, + ); + + assert!(result.is_err()); + } + + #[test] + fn test_checkout_validation_fails() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host = create_test_host(host_id, 4, ByteSize::gb(8)); + + cache.check_in(host, false); + + let result = cache.check_out( + CoreSize(2), + ByteSize::gb(4), + |_| false, // Always fail validation + ); + + assert!(result.is_err()); + } + + #[test] + fn test_checkout_already_checked_out() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host = create_test_host(host_id, 4, ByteSize::gb(8)); + + cache.check_in(host, false); + + // First checkout should succeed + let result1 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + assert!(result1.is_ok()); + + // Second checkout should fail because host is already checked out + let result2 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + assert!(result2.is_err()); + } + + #[test] + fn test_checkin() { + let cache = HostCache::default(); + let host_id = Uuid::new_v4(); + let host = create_test_host(host_id, 4, ByteSize::gb(8)); + + cache.check_in(host.clone(), false); + + // Checkout the host + let mut checked_host = assert_ok!(cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true)); + assert_eq!(checked_host.idle_cores.value(), 4); + + // Reduce the number of cores and checkin to ensure cache is updated + checked_host.idle_cores = CoreSize(1); + + // Check it back in + cache.check_in(checked_host, false); + assert_err!(cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true)); + assert_ok!(cache.check_out(CoreSize(1), ByteSize::gb(4), |_| true)); + } + + #[test] + fn test_find_candidate_with_multiple_hosts() { + let cache = HostCache::default(); + + // Add hosts with different resources + let host1_id = Uuid::new_v4(); + let host1 = create_test_host(host1_id, 2, ByteSize::gb(4)); + + let host2_id = Uuid::new_v4(); + let host2 = create_test_host(host2_id, 4, ByteSize::gb(8)); + + let host3_id = Uuid::new_v4(); + let host3 = create_test_host(host3_id, 8, ByteSize::gb(16)); + + cache.check_in(host1, false); + cache.check_in(host2, false); + cache.check_in(host3, false); + + // Request 3 cores, 6GB - should get host2 (4 cores, 8GB) or host3 (8 cores, 16GB) + let result = cache.check_out(CoreSize(3), ByteSize::gb(6), |_| true); + assert!(result.is_ok()); + + let chosen_host = result.unwrap(); + assert!(chosen_host.idle_cores.value() >= 3); + assert!(chosen_host.idle_memory >= ByteSize::gb(6)); + } + + #[test] + fn test_gen_memory_key() { + // The memory key formula is: memory / CONFIG.host_cache.memory_key_divisor.as_u64() + // With default 2.1GB divisor: + // 4GB / 2.1GB = 1 (rounded down) + // 8GB / 2.1GB = 3 (rounded down) + let memory1 = ByteSize::gb(4); // 4GB + let memory2 = ByteSize::gb(8); // 8GB + + let key1 = HostCache::gen_memory_key(memory1); + let key2 = HostCache::gen_memory_key(memory2); + + // Keys should be different and deterministic + assert_ne!(key1, key2); + assert_eq!(key1, HostCache::gen_memory_key(memory1)); // Should be deterministic + + // With 2.1GB divisor, should get expected values + assert_eq!(key1, 1); // 4GB / 2.1GB = ~1.9, rounded down to 1 + assert_eq!(key2, 3); // 8GB / 2.1GB = ~3.8, rounded down to 3 + } + + #[test] + fn test_multiple_hosts_same_resources() { + let cache = HostCache::default(); + + // Add multiple hosts with same resource configuration + let host1_id = Uuid::new_v4(); + let host1 = create_test_host(host1_id, 4, ByteSize::gb(8)); + + let host2_id = Uuid::new_v4(); + let host2 = create_test_host(host2_id, 4, ByteSize::gb(8)); + + cache.check_in(host1, false); + cache.check_in(host2, false); + + // First checkout should succeed + let result1 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + assert!(result1.is_ok()); + + // Second checkout should also succeed (different host) + let result2 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + assert!(result2.is_ok()); + + // The hosts should be different + assert_ne!(result1.unwrap().id, result2.unwrap().id); + } +} diff --git a/rust/crates/scheduler/src/host_cache/messages.rs b/rust/crates/scheduler/src/host_cache/messages.rs new file mode 100644 index 000000000..20e2dad24 --- /dev/null +++ b/rust/crates/scheduler/src/host_cache/messages.rs @@ -0,0 +1,124 @@ +use actix::{Message, MessageResponse}; + +use bytesize::ByteSize; +use miette::Result; +use uuid::Uuid; + +use crate::{ + cluster_key::{ClusterKey, Tag}, + host_cache::HostCacheError, + models::{CoreSize, Host}, +}; + +/// Response containing a checked-out host and its associated cluster key. +/// +/// Returned when a host is successfully checked out from the cache. The cluster +/// key is needed to return the host to the correct cache group after use. +/// +/// # Fields +/// +/// * `0` - ClusterKey identifying the cache group this host belongs to +/// * `1` - Host with reserved resources +#[derive(MessageResponse)] +pub struct CheckedOutHost(pub ClusterKey, pub Host); + +/// Actor message to check out a host from the cache. +/// +/// Requests a host that matches the specified resource requirements and passes +/// the validation function. The cache will search through groups for each tag +/// in priority order (MANUAL > HOSTNAME > ALLOC) until a suitable host is found. +/// +/// If not found in cache, the service will fetch from the database. The host +/// is removed from the cache and must be checked back in after use. +/// +/// # Fields +/// +/// * `facility_id` - Facility identifier for the cluster key +/// * `show_id` - Show identifier for the cluster key +/// * `tags` - List of tags to search (tried in priority order) +/// * `cores` - Minimum number of cores required +/// * `memory` - Minimum memory required +/// * `validation` - Function to validate additional host requirements +/// +/// # Returns +/// +/// * `Ok(CheckedOutHost)` - Successfully found and reserved a matching host +/// * `Err(HostCacheError::NoCandidateAvailable)` - No host meets requirements +/// * `Err(HostCacheError::FailedToQueryHostCache)` - Database query failed +#[derive(Message)] +#[rtype(result = "Result")] +pub struct CheckOut +where + F: Fn(&Host) -> bool, +{ + pub facility_id: Uuid, + pub show_id: Uuid, + pub tags: Vec, + pub cores: CoreSize, + pub memory: ByteSize, + pub validation: F, +} + +/// Payload for checking in a host or invalidating a host in the cache. +/// +/// Allows either returning a host with updated resources to the cache or +/// invalidating a host by its id, removing it from the cache entirely. +/// +/// # Variants +/// +/// * `Host(Host)` - Return a host with updated idle resource counts +/// * `Invalidate(Uuid)` - Invalidate and remove a host by id +pub enum CheckInPayload { + Host(Host), + Invalidate(Uuid), +} + +/// Actor message to return a host to the cache or invalidate it. +/// +/// Returns a host back to its cache group with updated resource state, or +/// invalidates a host by id, removing it from the cache. When returning +/// a host, it is removed from the reservation list and becomes available for +/// checkout again. If the cache group has expired, the host is dropped. +/// +/// # Fields +/// +/// * `0` - ClusterKey identifying which cache group the host belongs to +/// * `1` - CheckInPayload specifying whether to return a host or invalidate by id +/// +/// # Returns +/// +/// * `()` - Operation completed successfully (host returned/invalidated or cache group expired) +#[derive(Message)] +#[rtype(result = "()")] +pub struct CheckIn(pub ClusterKey, pub CheckInPayload); + +/// Actor message to retrieve cache performance metrics. +/// +/// Requests the current cache hit/miss statistics from the HostCacheService. +/// Used for monitoring cache effectiveness. +/// +/// # Returns +/// +/// * `CacheRatioResponse` - Cache performance metrics +#[derive(Message)] +#[rtype(result = CacheRatioResponse)] +pub struct CacheRatio; + +/// Response containing cache performance statistics. +/// +/// Provides metrics about cache hit/miss rates for monitoring cache effectiveness. +/// A high hit ratio indicates the cache is effectively reducing database queries. +/// +/// # Fields +/// +/// * `hit` - Total number of cache hits (hosts found in cache) +/// * `miss` - Total number of cache misses (required database fetch) +/// * `hit_ratio` - Percentage of cache hits (0-100) +#[derive(MessageResponse)] +pub struct CacheRatioResponse { + #[allow(dead_code)] + pub hit: u64, + #[allow(dead_code)] + pub miss: u64, + pub hit_ratio: usize, +} diff --git a/rust/crates/scheduler/src/host_cache/mod.rs b/rust/crates/scheduler/src/host_cache/mod.rs new file mode 100644 index 000000000..f9824b751 --- /dev/null +++ b/rust/crates/scheduler/src/host_cache/mod.rs @@ -0,0 +1,78 @@ +mod actor; +mod cache; +pub mod messages; +mod store; + +use actix::{Actor, Addr}; +pub use cache::HostCache; + +use miette::Diagnostic; +use thiserror::Error; +use uuid::Uuid; + +use miette::Result; +use tokio::sync::OnceCell; +use tracing::error; + +use crate::host_cache::messages::CacheRatio; + +pub use actor::HostCacheService; + +pub type HostId = Uuid; + +static HOST_CACHE: OnceCell> = OnceCell::const_new(); + +/// Gets or initializes the singleton host cache service actor. +/// +/// Returns a shared reference to the HostCacheService actor, creating it +/// if it doesn't exist. The service manages host availability caching and +/// checkout/checkin operations. +/// +/// # Returns +/// +/// * `Ok(Addr)` - Actor address for sending messages +/// * `Err(miette::Error)` - Failed to initialize the service +pub async fn host_cache_service() -> Result> { + HOST_CACHE + .get_or_try_init(|| async { + let service = HostCacheService::new().await?.start(); + + Ok(service) + }) + .await + .cloned() +} + +/// Retrieves the current cache hit ratio as a percentage. +/// +/// Returns the ratio of cache hits to total cache accesses (hits + misses) +/// as a percentage value between 0 and 100. +/// +/// # Returns +/// +/// * `usize` - Cache hit ratio percentage (0-100), or 0 if service unavailable +#[allow(dead_code)] +pub async fn hit_ratio() -> usize { + let host_cache = host_cache_service().await; + match host_cache { + Ok(cache) => { + cache + .send(CacheRatio) + .await + .expect("Actor is offline") + .hit_ratio + } + Err(_) => 0, + } +} + +#[derive(Debug, Error, Diagnostic)] +pub enum HostCacheError { + #[error("No host found with the required resources")] + NoCandidateAvailable, + + #[error( + "Failed to query Host. Cache is functional, but can't probably load new values from the database" + )] + FailedToQueryHostCache(String), +} diff --git a/rust/crates/scheduler/src/host_cache/store.rs b/rust/crates/scheduler/src/host_cache/store.rs new file mode 100644 index 000000000..9653e71e1 --- /dev/null +++ b/rust/crates/scheduler/src/host_cache/store.rs @@ -0,0 +1,682 @@ +//! Thread-safe host data storage with optimistic concurrency control. +//! +//! This module provides a global host store that manages host information with +//! concurrent read/write access and timestamp-based conflict resolution. + +use chrono::Utc; +use lazy_static::lazy_static; + +use scc::HashMap; +use tracing::debug; + +use crate::{config::CONFIG, host_cache::HostId, models::Host}; + +/// Thread-safe store for host data with concurrent access support. +/// +/// The `HostStore` uses `scc::HashMap` for lock-free concurrent operations. +/// +/// # Concurrency Model +/// +/// - **All operations** use lock-free atomic operations via `scc::HashMap` +/// - **Conflict resolution**: Timestamp-based optimistic concurrency control +/// +/// # Staleness Detection +/// +/// The store automatically detects and removes stale hosts during: +/// - `get()` operations - removes stale hosts on access +/// - `cleanup_stale_hosts()` - batch removal of all stale hosts +/// - `atomic_remove_if_valid()` - removes stale hosts regardless of validation +/// +/// Staleness threshold is configured via `CONFIG.host_cache.host_staleness_threshold`. +#[derive(Default)] +pub(super) struct HostStore { + /// Host actual data indexed by HostId + host_store: HashMap, +} + +impl HostStore { + /// Checks if a host is stale based on its last update timestamp. + /// + /// A host is considered stale if the time elapsed since its last update + /// exceeds the configured staleness threshold. + /// + /// # Arguments + /// + /// * `host` - The host to check for staleness + /// + /// # Returns + /// + /// * `true` - If the host is stale (age > threshold) + /// * `false` - If the host is still fresh or if duration conversion fails + /// + /// # Error Handling + /// + /// If the configured staleness threshold cannot be converted to a chrono Duration + /// (extremely unlikely in practice), defaults to zero duration, treating all hosts + /// as fresh. This prevents panics from malformed configuration. + fn is_host_stale(host: &Host) -> bool { + let now = Utc::now(); + let age = now - host.last_updated; + let staleness_threshold = CONFIG.host_cache.host_staleness_threshold; + let staleness_duration = + chrono::Duration::from_std(staleness_threshold).unwrap_or_default(); + + let stale = age > staleness_duration; + if stale { + debug!("Host {} on the cache store is stale", host); + } + stale + } + + /// Retrieves a host by ID with automatic staleness detection and removal. + /// + /// Returns a cloned copy of the host if found and not stale, or `None` if not present + /// or if the host's last_updated timestamp exceeds the staleness threshold. + /// + /// # Arguments + /// + /// * `host_id` - The unique identifier of the host to retrieve + /// + /// # Returns + /// + /// * `Some(Host)` - A clone of the host data if found and not stale + /// * `None` - If no host with the given ID exists or if the host is stale + /// + /// # Staleness Check + /// + /// A host is considered stale if: + /// `current_time - host.last_updated > host_staleness_threshold` + /// + /// Stale hosts are automatically removed from the store when detected. + /// + /// # Concurrency & Race Conditions + /// + /// This operation uses lock-free reads followed by a lock-free removal if stale. + /// Between the staleness check and removal, another thread could: + /// - Update the host with fresh data (removal would fail or remove stale version) + /// - Remove the host (removal becomes a no-op) + /// + /// These races are benign - at worst, a fresh host might need to be re-inserted, + /// but staleness detection ensures no stale data is returned to the caller. + pub fn get(&self, host_id: &HostId) -> Option { + let host = self + .host_store + .get_sync(host_id) + .map(|entry| entry.get().clone())?; + + // Check if the host is stale + if Self::is_host_stale(&host) { + // Host is stale, remove it from the store + self.remove(host_id); + return None; + } + + Some(host) + } + + /// Removes a host from the store by ID. + /// + /// # Arguments + /// + /// * `host_id` - The unique identifier of the host to remove + /// + /// # Returns + /// + /// * `Some(Host)` - The removed host data if it existed + /// * `None` - If no host with the given ID was found + /// + /// # Concurrency + /// + /// This operation uses lock-free atomic removal from the concurrent HashMap. + pub fn remove(&self, host_id: &HostId) -> Option { + self.host_store.remove_sync(host_id).map(|(_, host)| host) + } + + /// Atomically removes a host from the store only if it matches the expected state. + /// + /// This method implements atomic check-and-remove to prevent race conditions + /// where a host's state changes between lookup and removal operations. + /// + /// # Arguments + /// + /// * `host_id` - The unique identifier of the host to remove + /// * `expected_last_updated` - Expected timestamp to verify host hasn't changed + /// * `validation` - Additional validation function that must pass for removal + /// + /// # Returns + /// + /// * `Ok(Some(Host))` - Host was successfully removed and matched expectations + /// * `Ok(None)` - Host was not found in the store + /// * `Err(())` - Host exists but doesn't match expected state (timestamp mismatch or validation failure) + /// + /// # Staleness Handling + /// + /// Stale hosts are always removed regardless of timestamp or validation checks. + /// This ensures the store doesn't accumulate stale entries even when removal + /// attempts fail validation. + /// + /// # Race Condition Prevention + /// + /// Uses atomic entry operations (`entry_sync()`) to ensure the host state + /// verification and removal happen atomically. The entry holds exclusive access + /// during the entire check-and-remove operation, preventing other threads from + /// modifying the host between validation and removal. + /// + /// # Typical Usage Pattern + /// + /// ```ignore + /// // Read host and remember timestamp + /// let host = HOST_STORE.get(&host_id)?; + /// let timestamp = host.last_updated; + /// + /// // ... perform some work ... + /// + /// // Atomically remove only if host hasn't changed + /// match HOST_STORE.atomic_remove_if_valid(&host_id, timestamp, |h| h.idle_cores >= needed) { + /// Ok(Some(host)) => /* successfully removed */, + /// Ok(None) => /* host disappeared */, + /// Err(()) => /* host changed, retry operation */, + /// } + /// ``` + pub fn atomic_remove_if_valid( + &self, + host_id: &HostId, + expected_last_updated: chrono::DateTime, + validation: F, + ) -> Result, ()> + where + F: FnOnce(&Host) -> bool, + { + match self.host_store.entry_sync(*host_id) { + scc::hash_map::Entry::Occupied(entry) => { + let host = entry.get(); + + // Check staleness first + if Self::is_host_stale(host) { + // Host is stale, remove it + let removed_host = entry.remove(); + return Ok(Some(removed_host)); + } + + // Verify host hasn't changed since we looked it up + if host.last_updated != expected_last_updated { + return Err(()); + } + + // Apply additional validation + if !validation(host) { + return Err(()); + } + + // All checks passed, atomically remove the host + let removed_host = entry.remove(); + Ok(Some(removed_host)) + } + scc::hash_map::Entry::Vacant(_) => Ok(None), + } + } + + /// Inserts or updates a host in the store with optimistic concurrency control. + /// + /// This method implements timestamp-based conflict resolution to prevent + /// stale data from overwriting newer updates. It's the primary method for + /// updating host state in the cache. + /// + /// # Arguments + /// + /// * `host` - The host data to insert or update + /// * `authoritative` - If `true`, bypasses timestamp checks and forces the update. + /// If `false`, only updates if the new data is newer than existing data. + /// + /// # Returns + /// + /// The final host data in the store after the operation: + /// * If inserted/updated: returns the new host data + /// * If rejected (stale): returns the existing newer host data + /// + /// # Conflict Resolution + /// + /// When `authoritative = false`: + /// - Compares `host.last_updated` with existing `last_updated` timestamp + /// - Rejects update if incoming data is older or equal (`existing >= new`) + /// - Returns existing data without modification + /// + /// When `authoritative = true`: + /// - Unconditionally updates the host data + /// - Used for authoritative sources like database loads or admin operations + /// + /// # Concurrency & Race Conditions + /// + /// This operation performs a lock-free read followed by a lock-free upsert. + /// Between the timestamp check and upsert, another thread could: + /// - Insert/update the same host with different data + /// - Remove the host from the store + /// + /// The race is handled by `upsert_sync()` which atomically updates the entry. + /// However, if multiple threads update concurrently, the last writer wins. + /// Callers relying on ordering should use `atomic_remove_if_valid()` for + /// stronger consistency guarantees. + /// + /// # Note on Non-Authoritative Updates + /// + /// Non-authoritative updates (`authoritative = false`) may be rejected if + /// the incoming data is older than what's already in the store. This prevents + /// out-of-order updates from overwriting fresher data, which can happen when + /// multiple update sources have different latencies. + pub fn insert(&self, host: Host, authoritative: bool) -> Host { + // Ignore entries that are out of date + if let Some(existing_host) = self.host_store.get_sync(&host.id) { + if !authoritative && existing_host.last_updated >= host.last_updated { + return existing_host.get().clone(); + } + } + self.host_store + .upsert_sync(host.id, host.clone()) + .unwrap_or(host) + } + + /// Removes all stale hosts from the store in a batch operation. + /// + /// A host is considered stale if: + /// `current_time - host.last_updated > host_staleness_threshold` + /// + /// This method should be called periodically to clean up hosts that have + /// not been updated recently and are no longer active. It's more efficient + /// than relying solely on lazy removal via `get()` for large-scale cleanup. + /// + /// # Returns + /// + /// * `usize` - The number of stale hosts removed from the store + /// + /// # Implementation Details + /// + /// Uses a two-pass approach: + /// 1. First pass: iterate through all hosts to identify stale entries + /// 2. Second pass: remove identified stale hosts + /// + /// This avoids holding iteration locks during removal operations. + /// + /// # Concurrency & Race Conditions + /// + /// Between identifying stale hosts and removing them, concurrent operations may: + /// - Update a stale host with fresh data (removal becomes a no-op or removes old version) + /// - Remove hosts that we're about to remove (removal becomes a no-op) + /// - Insert new hosts (won't affect this cleanup operation) + /// + /// These races are benign. The worst case is redundant removal attempts + /// which are cheap no-ops. Fresh updates won't be incorrectly removed since + /// the store uses atomic operations. + pub fn cleanup_stale_hosts(&self) -> usize { + let mut stale_host_ids = Vec::new(); + + // First pass: identify stale hosts + self.host_store.iter_sync(|host_id, host| { + if Self::is_host_stale(host) { + stale_host_ids.push(*host_id); + } + true + }); + + // Second pass: remove stale hosts + let removed_count = stale_host_ids.len(); + for host_id in stale_host_ids { + self.remove(&host_id); + } + + removed_count + } +} + +lazy_static! { + /// Global singleton instance of the host store. + /// + /// This provides a shared, thread-safe host store accessible throughout + /// the application. Initialized on first access using lazy initialization. + /// + /// # Usage + /// + /// ```ignore + /// use crate::host_cache::store::HOST_STORE; + /// + /// // Read a host + /// if let Some(host) = HOST_STORE.get(&host_id) { + /// println!("Found host: {:?}", host); + /// } + /// + /// // Update a host + /// HOST_STORE.insert(updated_host, false); + /// + /// // Remove a host + /// HOST_STORE.remove(&host_id); + /// ``` + /// + /// # Thread Safety + /// + /// Multiple threads can safely access this store concurrently. Read operations + /// can proceed in parallel, while write operations ensure exclusive access. + pub(super) static ref HOST_STORE: HostStore = HostStore::default(); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::models::{CoreSize, Host}; + use bytesize::ByteSize; + use chrono::{Duration as ChronoDuration, Utc}; + use opencue_proto::host::ThreadMode; + use std::thread; + use uuid::Uuid; + + fn create_test_host_with_timestamp( + id: Uuid, + idle_cores: i32, + idle_memory: ByteSize, + last_updated: chrono::DateTime, + ) -> Host { + Host { + id, + name: format!("test-host-{}", id), + str_os: Some("Linux".to_string()), + total_cores: CoreSize(idle_cores), + total_memory: idle_memory, + idle_cores: CoreSize(idle_cores), + idle_memory, + idle_gpus: 0, + idle_gpu_memory: ByteSize::kb(0), + thread_mode: ThreadMode::Auto, + alloc_available_cores: CoreSize(idle_cores), + alloc_id: Uuid::new_v4(), + alloc_name: "test".to_string(), + last_updated, + } + } + + #[test] + fn test_cleanup_stale_hosts_removes_old_hosts() { + let store = HostStore::default(); + + // Create hosts with different ages + let now = Utc::now(); + let staleness_threshold = CONFIG.host_cache.host_staleness_threshold; + let staleness_duration = ChronoDuration::from_std(staleness_threshold).unwrap(); + + // Fresh host (within threshold) + let fresh_host_id = Uuid::new_v4(); + let fresh_host = create_test_host_with_timestamp( + fresh_host_id, + 4, + ByteSize::gb(8), + now - ChronoDuration::seconds(30), + ); + + // Stale host (beyond threshold) + let stale_host_id = Uuid::new_v4(); + let stale_host = create_test_host_with_timestamp( + stale_host_id, + 4, + ByteSize::gb(8), + now - staleness_duration - ChronoDuration::seconds(10), + ); + + // Very stale host (way beyond threshold) + let very_stale_host_id = Uuid::new_v4(); + let very_stale_host = create_test_host_with_timestamp( + very_stale_host_id, + 4, + ByteSize::gb(8), + now - staleness_duration - ChronoDuration::hours(1), + ); + + // Insert hosts using authoritative insert + store.insert(fresh_host.clone(), true); + store.insert(stale_host.clone(), true); + store.insert(very_stale_host.clone(), true); + + // Verify fresh host is present (doesn't trigger removal via get) + assert!(store.get(&fresh_host_id).is_some()); + + // Note: We don't call get() on stale hosts because get() automatically removes them. + // Instead, we directly verify the store contains them by checking the internal state. + // We'll verify removal works via cleanup_stale_hosts directly. + + // Run cleanup - should find and remove stale hosts + let removed_count = store.cleanup_stale_hosts(); + + // Should have removed 2 stale hosts + assert_eq!(removed_count, 2); + + // Fresh host should still be present + assert!(store.get(&fresh_host_id).is_some()); + + // Stale hosts should be removed + assert!(store.get(&stale_host_id).is_none()); + assert!(store.get(&very_stale_host_id).is_none()); + } + + #[test] + fn test_cleanup_stale_hosts_no_stale_hosts() { + let store = HostStore::default(); + + // Create only fresh hosts + let now = Utc::now(); + + let host1_id = Uuid::new_v4(); + let host1 = create_test_host_with_timestamp( + host1_id, + 4, + ByteSize::gb(8), + now - ChronoDuration::seconds(10), + ); + + let host2_id = Uuid::new_v4(); + let host2 = create_test_host_with_timestamp( + host2_id, + 4, + ByteSize::gb(8), + now - ChronoDuration::seconds(20), + ); + + store.insert(host1, true); + store.insert(host2, true); + + // Run cleanup + let removed_count = store.cleanup_stale_hosts(); + + // Should have removed 0 hosts + assert_eq!(removed_count, 0); + + // All hosts should still be present + assert!(store.get(&host1_id).is_some()); + assert!(store.get(&host2_id).is_some()); + } + + #[test] + fn test_cleanup_stale_hosts_empty_store() { + let store = HostStore::default(); + + // Run cleanup on empty store + let removed_count = store.cleanup_stale_hosts(); + + // Should have removed 0 hosts + assert_eq!(removed_count, 0); + } + + #[test] + fn test_get_removes_stale_host() { + let store = HostStore::default(); + + // Create a stale host + let now = Utc::now(); + let staleness_threshold = CONFIG.host_cache.host_staleness_threshold; + let staleness_duration = ChronoDuration::from_std(staleness_threshold).unwrap(); + + let stale_host_id = Uuid::new_v4(); + let stale_host = create_test_host_with_timestamp( + stale_host_id, + 4, + ByteSize::gb(8), + now - staleness_duration - ChronoDuration::seconds(10), + ); + + store.insert(stale_host, true); + + // First get should detect staleness and remove the host + let result = store.get(&stale_host_id); + assert!(result.is_none()); + + // Second get should also return None + let result2 = store.get(&stale_host_id); + assert!(result2.is_none()); + } + + #[test] + fn test_cleanup_stale_hosts_concurrent() { + use std::sync::Arc; + + let store = Arc::new(HostStore::default()); + + // Create a mix of fresh and stale hosts + let now = Utc::now(); + let staleness_threshold = CONFIG.host_cache.host_staleness_threshold; + let staleness_duration = ChronoDuration::from_std(staleness_threshold).unwrap(); + + for i in 0..10 { + let host_id = Uuid::new_v4(); + let is_stale = i % 2 == 0; + let timestamp = if is_stale { + now - staleness_duration - ChronoDuration::seconds(10) + } else { + now - ChronoDuration::seconds(10) + }; + + let host = create_test_host_with_timestamp(host_id, 4, ByteSize::gb(8), timestamp); + store.insert(host, true); + } + + // Run cleanup from multiple threads + let handles: Vec<_> = (0..3) + .map(|_| { + let store_clone = Arc::clone(&store); + thread::spawn(move || store_clone.cleanup_stale_hosts()) + }) + .collect(); + + // Wait for all threads to complete + let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect(); + + // At least one thread should report cleaning up hosts + let total_removed: usize = results.iter().sum(); + assert!(total_removed > 0); + } + + #[test] + fn test_atomic_remove_if_valid_success() { + let store = HostStore::default(); + + // Create a test host + let host_id = Uuid::new_v4(); + let timestamp = Utc::now(); + let host = create_test_host_with_timestamp(host_id, 4, ByteSize::gb(8), timestamp); + + store.insert(host.clone(), true); + + // Atomic remove should succeed with correct timestamp and validation + let result = + store.atomic_remove_if_valid(&host.id, timestamp, |h| h.idle_cores.value() >= 4); + + assert!(matches!(result, Ok(Some(_)))); + if let Ok(Some(removed_host)) = result { + assert_eq!(removed_host.id, host.id); + } + + // Host should be gone from store + assert!(store.get(&host.id).is_none()); + } + + #[test] + fn test_atomic_remove_if_valid_timestamp_mismatch() { + let store = HostStore::default(); + + // Create a test host + let host_id = Uuid::new_v4(); + let timestamp = Utc::now(); + let host = create_test_host_with_timestamp(host_id, 4, ByteSize::gb(8), timestamp); + + store.insert(host.clone(), true); + + // Try to remove with wrong timestamp + let wrong_timestamp = timestamp - ChronoDuration::seconds(60); + let result = + store.atomic_remove_if_valid(&host.id, wrong_timestamp, |h| h.idle_cores.value() >= 4); + + // Should return an error + assert!(result.is_err()); + + // Host should still be in store + assert!(store.get(&host.id).is_some()); + } + + #[test] + fn test_atomic_remove_if_valid_validation_failure() { + let store = HostStore::default(); + + // Create a test host with 4 cores + let host_id = Uuid::new_v4(); + let timestamp = Utc::now(); + let host = create_test_host_with_timestamp(host_id, 4, ByteSize::gb(8), timestamp); + + store.insert(host.clone(), true); + + // Try to remove with validation requiring 8 cores + let result = store.atomic_remove_if_valid( + &host.id, + timestamp, + |h| h.idle_cores.value() >= 8, // This will fail + ); + + // Should return an error + assert!(result.is_err()); + + // Host should still be in store + assert!(store.get(&host.id).is_some()); + } + + #[test] + fn test_atomic_remove_if_valid_stale_host() { + let store = HostStore::default(); + + // Create a stale host + let host_id = Uuid::new_v4(); + let staleness_threshold = CONFIG.host_cache.host_staleness_threshold; + let staleness_duration = ChronoDuration::from_std(staleness_threshold).unwrap(); + let stale_timestamp = Utc::now() - staleness_duration - ChronoDuration::seconds(10); + + let host = create_test_host_with_timestamp(host_id, 4, ByteSize::gb(8), stale_timestamp); + store.insert(host.clone(), true); + + // Atomic remove should succeed and remove stale host regardless of validation + let result = store.atomic_remove_if_valid( + &host.id, + stale_timestamp, + |h| h.idle_cores.value() >= 8, // Would normally fail, but staleness overrides + ); + + assert!(matches!(result, Ok(Some(_)))); + if let Ok(Some(removed_host)) = result { + assert_eq!(removed_host.id, host.id); + } + + // Host should be gone from store + assert!(store.get(&host.id).is_none()); + } + + #[test] + fn test_atomic_remove_if_valid_nonexistent_host() { + let store = HostStore::default(); + + let nonexistent_id = Uuid::new_v4(); + let result = store.atomic_remove_if_valid(&nonexistent_id, Utc::now(), |_| true); + + assert!(matches!(result, Ok(None))); + } +} diff --git a/rust/crates/scheduler/src/lib.rs b/rust/crates/scheduler/src/lib.rs new file mode 100644 index 000000000..76e8cb50a --- /dev/null +++ b/rust/crates/scheduler/src/lib.rs @@ -0,0 +1,10 @@ +pub mod allocation; +pub mod cluster; +pub mod cluster_key; +pub mod config; +pub mod dao; +pub mod host_cache; +pub mod metrics; +pub mod models; +pub mod pgpool; +pub mod pipeline; diff --git a/rust/crates/scheduler/src/main.rs b/rust/crates/scheduler/src/main.rs new file mode 100644 index 000000000..a2f3bb079 --- /dev/null +++ b/rust/crates/scheduler/src/main.rs @@ -0,0 +1,264 @@ +use std::str::FromStr; + +use miette::{miette, Context, IntoDiagnostic}; +use structopt::StructOpt; +use tokio::signal::unix::{signal, SignalKind}; +use tracing_rolling_file::{RollingConditionBase, RollingFileAppenderBase}; +use tracing_subscriber::{layer::SubscriberExt, reload}; +use tracing_subscriber::{EnvFilter, Registry}; + +use crate::{ + cluster::{Cluster, ClusterFeed}, + cluster_key::{ClusterKey, Tag, TagType}, + config::CONFIG, +}; + +mod allocation; +mod cluster; +mod cluster_key; +mod config; +mod dao; +mod host_cache; +mod metrics; +mod models; +mod pgpool; +mod pipeline; + +// scheduler --facility eat --alloc_tags=show:tag,show:tag,show:tag --manual_tags=tag1,tag2 +#[derive(StructOpt, Debug)] +pub struct JobQueueCli { + #[structopt(long, short = "f", long_help = "Facility code to run on")] + facility: Option, + + #[structopt( + long, + short = "a", + long_help = "A list of show:tag entries associated to an allocation. (eg. show1:general)." + )] + alloc_tags: Vec, + + #[structopt( + long, + short = "t", + long_help = "A list of tags not associated with an allocation." + )] + manual_tags: Vec, + + #[structopt( + long, + short = "i", + long_help = "A list of tags to ignore when loading clusters." + )] + ignore_tags: Vec, +} + +#[derive(Debug, Clone)] +pub struct ColonSeparatedList(pub String, pub String); + +impl FromStr for ColonSeparatedList { + type Err = String; + + fn from_str(s: &str) -> Result { + let parts: Vec<&str> = s.split(":").map(|v| v.trim()).collect(); + if parts.len() != 2 { + return Err(format!("Invalid format: expected 'show:tag', got '{}'", s)); + } + Ok(ColonSeparatedList( + parts[0].to_string(), + parts[1].to_string(), + )) + } +} + +impl JobQueueCli { + async fn run(&self) -> miette::Result<()> { + // Merge CLI args with config, CLI takes precedence + let facility = if self.facility.is_some() { + self.facility.clone() + } else { + CONFIG.scheduler.facility.clone() + }; + + let alloc_tags = if !self.alloc_tags.is_empty() { + // CLI args provided, use them + self.alloc_tags.clone() + } else { + // Use config values + CONFIG + .scheduler + .alloc_tags + .iter() + .map(|at| ColonSeparatedList(at.show.clone(), at.tag.clone())) + .collect() + }; + + let manual_tags = if !self.manual_tags.is_empty() { + // CLI args provided, use them + self.manual_tags.clone() + } else { + // Use config values + CONFIG.scheduler.manual_tags.clone() + }; + + let ignore_tags = if !self.ignore_tags.is_empty() { + // CLI args provided, use them + self.ignore_tags.clone() + } else { + // Use config values + CONFIG.scheduler.ignore_tags.clone() + }; + + // Lookup facility_id from facility name + let facility_id = match &facility { + Some(facility) => Some( + cluster::get_facility_id(facility) + .await + .wrap_err("Invalid facility name")?, + ), + None => None, + }; + + let mut clusters = Vec::new(); + + if let Some(facility_id) = &facility_id { + // Build Cluster::ComposedKey for each alloc_tag (show:tag format) + for alloc_tag in &alloc_tags { + let show_id = cluster::get_show_id(&alloc_tag.0) + .await + .wrap_err("Could not find show {}.")?; + clusters.push(Cluster::ComposedKey(ClusterKey { + facility_id: *facility_id, + show_id, + tag: Tag { + name: alloc_tag.1.clone(), + ttype: TagType::Alloc, + }, + })); + } + + // Build Cluster::TagsKey for manual_tags + if !manual_tags.is_empty() { + clusters.push(Cluster::TagsKey( + *facility_id, + manual_tags + .iter() + .map(|name| Tag { + name: name.clone(), + ttype: TagType::Manual, + }) + .collect(), + )); + } + } else if !alloc_tags.is_empty() { + Err(miette!("Alloc tag requires a valid facility"))? + } + + let cluster_feed = if alloc_tags.is_empty() && manual_tags.is_empty() { + ClusterFeed::load_all(&facility_id, &ignore_tags).await? + } else { + ClusterFeed::load_from_clusters(clusters, &ignore_tags) + }; + + pipeline::run(cluster_feed).await + } +} + +fn main() -> miette::Result<()> { + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(CONFIG.queue.worker_threads) + .enable_all() + .build() + .into_diagnostic()?; + + // Spawn the actor system in the background + let actor_system = actix::System::with_tokio_rt(|| runtime); + + actor_system.block_on(async_main()) +} + +async fn async_main() -> miette::Result<()> { + let log_level = CONFIG.logging.level.as_str().to_lowercase(); + + // Use EnvFilter to suppress sqlx logs (set to warn so only warnings/errors show) + let filter = EnvFilter::new(log_level); + let (filter, reload_handle) = reload::Layer::new(filter); + + let stdout_log = tracing_subscriber::fmt::layer().pretty(); + let subs = Registry::default().with(stdout_log).with(filter); + + let file_appender_layer = if CONFIG.logging.file_appender { + let file_appender = RollingFileAppenderBase::new( + CONFIG.logging.path.clone(), + RollingConditionBase::new().max_size(1024 * 1024), + 7, + ) + .expect("Failed to create appender"); + let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender); + Some(tracing_subscriber::fmt::layer().with_writer(non_blocking)) + } else { + None + }; + let subs = subs.with(file_appender_layer); + + tracing::subscriber::set_global_default(subs).expect("Unable to set global subscriber"); + + // Start Prometheus metrics HTTP server in background + let metrics_addr = "0.0.0.0:9090"; + tokio::spawn(async move { + if let Err(e) = metrics::start_server(metrics_addr).await { + tracing::error!("Metrics server failed: {}", e); + } + }); + + // Watch for sigusr1 and sigusr2, when received toggle between info/debug levels + tokio::spawn(async move { + let mut sigusr1 = + signal(SignalKind::user_defined1()).expect("Failed to register signal listener"); + let mut sigusr2 = + signal(SignalKind::user_defined2()).expect("Failed to register signal listener"); + let mut is_info = CONFIG.logging.level.to_lowercase() == "info"; + loop { + tokio::select! { + _ = + sigusr1.recv() => { + + // Toggle log between info and DEBUG (keep sqlx at info so it doesn't show up) + is_info = !is_info; + let new_filter = if is_info { + EnvFilter::new("info,sqlx=info") + } else { + EnvFilter::new("debug,sqlx=info") + }; + reload_handle + .modify(|filter| { + *filter = new_filter; + }) + .ok(); + } + _ = + sigusr2.recv() => { + + // Toggle log between info and DEBUG (keep sqlx at info so it doesn't show up) + is_info = !is_info; + let new_filter = if is_info { + EnvFilter::new("info,sqlx=info") + } else { + EnvFilter::new("debug,sqlx=debug") + }; + reload_handle + .modify(|filter| { + *filter = new_filter; + }) + .ok(); + } + } + } + }); + + let opts = JobQueueCli::from_args(); + let result = opts.run().await; + + actix::System::current().stop(); + + result +} diff --git a/rust/crates/scheduler/src/metrics/mod.rs b/rust/crates/scheduler/src/metrics/mod.rs new file mode 100644 index 000000000..937d6a809 --- /dev/null +++ b/rust/crates/scheduler/src/metrics/mod.rs @@ -0,0 +1,149 @@ +use axum::{response::IntoResponse, routing::get, Router}; +use lazy_static::lazy_static; +use prometheus::{register_counter, register_histogram, Counter, Encoder, Histogram, TextEncoder}; +use std::time::Duration; +use tracing::{error, info}; + +lazy_static! { + // Job metrics from entrypoint.rs + pub static ref JOBS_QUERIED_TOTAL: Counter = register_counter!( + "scheduler_jobs_queried_total", + "Total number of jobs queried from the database" + ) + .expect("Failed to register jobs_queried_total counter"); + + pub static ref JOBS_PROCESSED_TOTAL: Counter = register_counter!( + "scheduler_jobs_processed_total", + "Total number of jobs processed by the scheduler" + ) + .expect("Failed to register jobs_processed_total counter"); + + // Matcher metrics from matcher.rs + pub static ref NO_CANDIDATE_ITERATIONS_TOTAL: Counter = register_counter!( + "scheduler_no_candidate_iterations_total", + "Total number of NoCandidateAvailable iterations" + ) + .expect("Failed to register no_candidate_iterations_total counter"); + + pub static ref CANDIDATES_PER_LAYER: Histogram = register_histogram!( + "scheduler_candidates_per_layer", + "Histogram of candidates needed to fully consume a layer", + vec![1.0, 5.0, 10.0, 20.0, 50.0, 100.0] + ) + .expect("Failed to register candidates_per_layer histogram"); + + // Dispatcher metrics from dispatcher/actor.rs + pub static ref FRAMES_DISPATCHED_TOTAL: Counter = register_counter!( + "scheduler_frames_dispatched_total", + "Total number of frames dispatched" + ) + .expect("Failed to register frames_dispatched_total counter"); + + pub static ref TIME_TO_BOOK_SECONDS: Histogram = register_histogram!( + "scheduler_time_to_book_seconds", + "Time from frame updated_at until it got fully dispatched", + vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0] + ) + .expect("Failed to register time_to_book_seconds histogram"); + + // Job query metrics from dao/job_dao.rs + pub static ref JOB_QUERY_DURATION_SECONDS: Histogram = register_histogram!( + "scheduler_job_query_duration_seconds", + "Duration of job query operations", + vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0] + ) + .expect("Failed to register job_query_duration_seconds histogram"); +} + +/// Handler for the /metrics endpoint +async fn metrics_handler() -> impl IntoResponse { + let encoder = TextEncoder::new(); + let metric_families = prometheus::gather(); + let mut buffer = Vec::new(); + + match encoder.encode(&metric_families, &mut buffer) { + Ok(_) => { + let response = String::from_utf8(buffer).unwrap_or_else(|_| String::from("")); + ( + axum::http::StatusCode::OK, + [("content-type", "text/plain; version=0.0.4")], + response, + ) + } + Err(e) => { + error!("Failed to encode metrics: {}", e); + ( + axum::http::StatusCode::INTERNAL_SERVER_ERROR, + [("content-type", "text/plain")], + format!("Failed to encode metrics: {}", e), + ) + } + } +} + +/// Start the metrics HTTP server +/// +/// # Arguments +/// +/// * `addr` - The address to bind the server to (e.g., "0.0.0.0:9090") +/// +/// # Returns +/// +/// This function runs indefinitely and only returns if the server fails to start +pub async fn start_server(addr: &str) -> miette::Result<()> { + let app = Router::new().route("/metrics", get(metrics_handler)); + + let listener = tokio::net::TcpListener::bind(addr) + .await + .map_err(|e| miette::miette!("Failed to bind metrics server to {}: {}", addr, e))?; + + info!("Metrics server listening on http://{}/metrics", addr); + + axum::serve(listener, app) + .await + .map_err(|e| miette::miette!("Metrics server error: {}", e))?; + + Ok(()) +} + +/// Helper function to increment jobs queried counter +#[inline] +pub fn increment_jobs_queried(count: usize) { + JOBS_QUERIED_TOTAL.inc_by(count as f64); +} + +/// Helper function to increment jobs processed counter +#[inline] +pub fn increment_jobs_processed() { + JOBS_PROCESSED_TOTAL.inc(); +} + +/// Helper function to increment no candidate iterations counter +#[inline] +pub fn increment_no_candidate_iterations() { + NO_CANDIDATE_ITERATIONS_TOTAL.inc(); +} + +/// Helper function to observe candidates per layer +#[inline] +pub fn observe_candidates_per_layer(candidates: usize) { + CANDIDATES_PER_LAYER.observe(candidates as f64); +} + +/// Helper function to increment frames dispatched counter +#[inline] +pub fn increment_frames_dispatched() { + FRAMES_DISPATCHED_TOTAL.inc(); +} + +/// Helper function to observe time to book +#[inline] +pub fn observe_time_to_book(duration: Duration) { + TIME_TO_BOOK_SECONDS.observe(duration.as_secs_f64()); +} + +/// Helper function to observe job query duration +#[inline] +pub fn observe_job_query_duration(duration: Duration) { + JOB_QUERY_DURATION_SECONDS.observe(duration.as_secs_f64()); +} diff --git a/rust/crates/scheduler/src/models/core_size.rs b/rust/crates/scheduler/src/models/core_size.rs new file mode 100644 index 000000000..9c29c72ba --- /dev/null +++ b/rust/crates/scheduler/src/models/core_size.rs @@ -0,0 +1,139 @@ +/// A module to handle two different units used to represent cores: +/// `CoreSize` and `CoreSizeWithMultiplier`. +/// +/// In OpenCue's database, core counts are stored with a multiplier (typically 100, +/// configurable in the CueBot config file). For example, 1 core might be stored as 100. +/// +/// To simplify booking calculations, this multiplier is often ignored to avoid partial +/// bookings (fractions of a single core). However, mixing values with and without the +/// multiplier can lead to bugs in calculations. +/// +/// This module provides two distinct types that can be converted between each other +/// but cannot be directly used together in operations, preventing accidental mixing +/// of multiplied and non-multiplied values. +/// +use core::fmt; +use std::{ + cmp, + fmt::Display, + ops::{Add, Sub}, +}; + +use serde::{Deserialize, Serialize}; + +use crate::config::CONFIG; + +/// Size of a processing unit (# cores without multiplier) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct CoreSize(pub i32); + +/// Size of a processing unit with a multiplier (# cores with multiplier) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct CoreSizeWithMultiplier(pub i32); + +impl CoreSize { + /// Returns the raw core count value without multiplier. + /// + /// # Returns + /// + /// * `i32` - Number of cores + pub fn value(self) -> i32 { + self.0 + } + + /// Converts this CoreSize to CoreSizeWithMultiplier by applying the configured multiplier. + /// + /// # Returns + /// + /// * `CoreSizeWithMultiplier` - Core count with multiplier applied + #[allow(dead_code)] + pub fn with_multiplier(self) -> CoreSizeWithMultiplier { + self.into() + } + + /// Creates a CoreSize from a raw integer value that includes the multiplier. + /// + /// # Arguments + /// + /// * `size_with_multiplier` - Core count with multiplier already applied + /// + /// # Returns + /// + /// * `CoreSize` - Core count without multiplier + pub fn from_multiplied(size_with_multiplier: i32) -> CoreSize { + Self(size_with_multiplier / CONFIG.queue.core_multiplier as i32) + } +} + +impl CoreSizeWithMultiplier { + /// Returns the raw core count value with multiplier applied. + /// + /// # Returns + /// + /// * `i32` - Number of cores multiplied by the configured multiplier + pub fn value(self) -> i32 { + self.0 + } +} + +impl From for CoreSizeWithMultiplier { + fn from(value: CoreSize) -> Self { + CoreSizeWithMultiplier(value.value() * CONFIG.queue.core_multiplier as i32) + } +} + +impl From for CoreSize { + fn from(value: CoreSizeWithMultiplier) -> Self { + CoreSize(value.value() / CONFIG.queue.core_multiplier as i32) + } +} + +impl Add for CoreSize { + type Output = CoreSize; + + fn add(self, rhs: Self) -> Self::Output { + Self(rhs.value() + self.value()) + } +} + +impl Add for CoreSizeWithMultiplier { + type Output = CoreSizeWithMultiplier; + + fn add(self, rhs: Self) -> Self::Output { + Self(rhs.value() + self.value()) + } +} + +impl Sub for CoreSize { + type Output = CoreSize; + + fn sub(self, rhs: Self) -> Self::Output { + Self(self.value() - rhs.value()) + } +} + +impl Sub for CoreSizeWithMultiplier { + type Output = CoreSizeWithMultiplier; + + fn sub(self, rhs: Self) -> Self::Output { + Self(self.value() - rhs.value()) + } +} + +impl cmp::Ord for CoreSize { + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.0.cmp(&other.0) + } +} + +impl cmp::PartialOrd for CoreSize { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Display for CoreSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} diff --git a/rust/crates/scheduler/src/models/frame.rs b/rust/crates/scheduler/src/models/frame.rs new file mode 100644 index 000000000..80df12953 --- /dev/null +++ b/rust/crates/scheduler/src/models/frame.rs @@ -0,0 +1,63 @@ +use std::{fmt::Display, time::SystemTime}; + +use bytesize::ByteSize; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::models::{core_size::CoreSize, fmt_uuid}; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct DispatchFrame { + // Entity fields + pub id: Uuid, + pub frame_name: String, + + // LayerEntity fields + pub show_id: Uuid, + pub facility_id: Uuid, + pub job_id: Uuid, + + // FrameEntity fields + pub layer_id: Uuid, + + // DispatchFrame specific fields + pub command: String, + pub range: String, + pub chunk_size: i32, + pub show_name: String, + pub shot: String, + pub user: String, + pub uid: Option, + pub log_dir: String, + pub layer_name: String, + pub job_name: String, + // Min cores can be a negative, representing `machine_total_cores - min_cores` + pub min_cores: CoreSize, + pub layer_cores_limit: Option, + pub threadable: bool, + pub has_selfish_service: bool, + pub min_gpus: u32, + pub min_gpu_memory: ByteSize, + pub min_memory: ByteSize, + // On Cuebot these fields come from constants, maybe replicate these constants here + // pub int_soft_memory_limit: i64, + // pub int_hard_memory_limit: i64, + pub services: Option, + pub os: Option, + pub loki_url: Option, + pub version: u32, + pub updated_at: SystemTime, +} + +impl Display for DispatchFrame { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}.{}.{}({})", + self.job_name, + self.layer_name, + self.frame_name, + fmt_uuid(&self.id) + ) + } +} diff --git a/rust/crates/scheduler/src/models/host.rs b/rust/crates/scheduler/src/models/host.rs new file mode 100644 index 000000000..24da1e222 --- /dev/null +++ b/rust/crates/scheduler/src/models/host.rs @@ -0,0 +1,90 @@ +use std::fmt::Display; + +use bytesize::ByteSize; +use chrono::{DateTime, Local, Utc}; +use opencue_proto::host::ThreadMode; +use uuid::Uuid; + +use crate::models::{core_size::CoreSize, fmt_uuid}; + +// TODO: Evaluate removing Clone and passing Host's reference around +#[derive(Clone, Debug)] +pub struct Host { + pub(crate) id: Uuid, + pub(crate) name: String, + pub(crate) str_os: Option, + pub(crate) total_cores: CoreSize, + pub(crate) total_memory: ByteSize, + pub(crate) idle_cores: CoreSize, + pub(crate) idle_memory: ByteSize, + pub(crate) idle_gpus: u32, + pub(crate) idle_gpu_memory: ByteSize, + pub(crate) thread_mode: ThreadMode, + pub(crate) alloc_available_cores: CoreSize, + pub(crate) alloc_id: Uuid, + pub(crate) alloc_name: String, + pub(crate) last_updated: DateTime, +} + +impl Host { + /// Creates a new Host instance for testing purposes. + /// + /// # Arguments + /// + /// * `id` - Host identifier + /// * `name` - Host name + /// * `str_os` - Operating system string + /// * `total_cores` - Total number of cores on the host + /// * `total_memory` - Total memory available on the host + /// * `idle_cores` - Number of idle cores + /// * `idle_memory` - Amount of idle memory + /// * `idle_gpus` - Number of idle GPUs + /// * `idle_gpu_memory` - Amount of idle GPU memory + /// * `thread_mode` - Threading mode configuration + /// * `alloc_available_cores` - Available cores for allocation + /// * `allocation_name` - Name of the allocation + /// + /// # Returns + /// + /// * `Host` - New host instance configured for testing + #[allow(dead_code)] + #[allow(dead_code, clippy::too_many_arguments)] + pub fn new_for_test( + id: Uuid, + name: String, + str_os: Option, + total_cores: CoreSize, + total_memory: ByteSize, + idle_cores: CoreSize, + idle_memory: ByteSize, + idle_gpus: u32, + idle_gpu_memory: ByteSize, + thread_mode: ThreadMode, + alloc_available_cores: CoreSize, + alloc_id: Uuid, + alloc_name: String, + ) -> Self { + Self { + id, + name, + str_os, + total_cores, + total_memory, + idle_cores, + idle_memory, + idle_gpus, + idle_gpu_memory, + thread_mode, + alloc_available_cores, + alloc_id, + alloc_name, + last_updated: Local::now().with_timezone(&Utc), + } + } +} + +impl Display for Host { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}({})", self.name, fmt_uuid(&self.id)) + } +} diff --git a/rust/crates/scheduler/src/models/job.rs b/rust/crates/scheduler/src/models/job.rs new file mode 100644 index 000000000..39946aeb8 --- /dev/null +++ b/rust/crates/scheduler/src/models/job.rs @@ -0,0 +1,20 @@ +use core::fmt; + +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::{cluster::Cluster, models::fmt_uuid}; + +/// Basic information to collect a job on the database for dispatching +#[derive(Serialize, Deserialize, Clone)] +pub struct DispatchJob { + pub id: Uuid, + pub int_priority: i32, + pub source_cluster: Cluster, +} + +impl fmt::Display for DispatchJob { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", fmt_uuid(&self.id)) + } +} diff --git a/rust/crates/scheduler/src/models/layer.rs b/rust/crates/scheduler/src/models/layer.rs new file mode 100644 index 000000000..443bfb95b --- /dev/null +++ b/rust/crates/scheduler/src/models/layer.rs @@ -0,0 +1,52 @@ +use core::fmt; +use std::collections::HashSet; + +use bytesize::ByteSize; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::models::{core_size::CoreSize, fmt_uuid, DispatchFrame}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct DispatchLayer { + pub id: Uuid, + pub job_id: Uuid, + pub facility_id: Uuid, + pub show_id: Uuid, + pub job_name: String, + pub layer_name: String, + pub str_os: Option, + pub cores_min: CoreSize, + pub mem_min: ByteSize, + pub threadable: bool, + pub gpus_min: i32, + pub gpu_mem_min: ByteSize, + pub tags: HashSet, + pub frames: Vec, +} + +impl fmt::Display for DispatchLayer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}.{}({})", + self.job_name, + self.layer_name, + fmt_uuid(&self.id) + ) + } +} + +impl DispatchLayer { + /// Removes frames with matching IDs from this layer's frame list. + /// + /// Used to clean up frames after dispatch attempts (both successful and failed) + /// to prevent livelock situations where frames are repeatedly retried. + /// + /// # Arguments + /// + /// * `frame_ids` - Vector of frame IDs to remove from the layer + pub fn drain_frames(&mut self, frame_ids: Vec) { + self.frames.retain(|f| !frame_ids.contains(&f.id)) + } +} diff --git a/rust/crates/scheduler/src/models/mod.rs b/rust/crates/scheduler/src/models/mod.rs new file mode 100644 index 000000000..f1f723911 --- /dev/null +++ b/rust/crates/scheduler/src/models/mod.rs @@ -0,0 +1,32 @@ +mod core_size; +mod frame; +mod host; +mod job; +mod layer; +mod subscription; +mod virtual_proc; + +pub use core_size::{CoreSize, CoreSizeWithMultiplier}; +pub use frame::DispatchFrame; +pub use host::Host; +pub use job::DispatchJob; +pub use layer::DispatchLayer; +pub use subscription::{Allocation, Subscription}; +pub use virtual_proc::VirtualProc; + +use uuid::Uuid; + +/// Formats a UUID by returning only the first segment before the first hyphen. +/// +/// # Arguments +/// +/// * `id` - UUID reference +/// +/// # Returns +/// +/// * `String` - First segment of the UUID (8 characters) +pub fn fmt_uuid(id: &Uuid) -> String { + // Uuid::simple() returns a 32-character hex string without hyphens + // We take the first 8 characters which corresponds to the first segment + id.simple().to_string()[..8].to_string() +} diff --git a/rust/crates/scheduler/src/models/subscription.rs b/rust/crates/scheduler/src/models/subscription.rs new file mode 100644 index 000000000..969e22184 --- /dev/null +++ b/rust/crates/scheduler/src/models/subscription.rs @@ -0,0 +1,86 @@ +use uuid::Uuid; + +use crate::models::CoreSize; + +/// Represents a subscription linking an allocation to a show with resource limits. +/// +/// A subscription defines how many resources (cores, GPUs) from a specific allocation +/// are available for a particular show. It includes both base capacity (size) and +/// burst capacity for handling peak loads. +/// +/// This is the internal business logic representation, isolated from database schema changes. +#[allow(dead_code)] +#[derive(Debug, Clone)] +pub struct Subscription { + /// Unique subscription identifier + pub id: Uuid, + + /// Allocation ID that provides the resources + pub allocation_id: Uuid, + + /// Allocation Name that provides the resources + pub allocation_name: String, + + /// Show ID that can use the resources + pub show_id: Uuid, + + /// Base resource allocation size + pub size: i64, + + /// Additional burst capacity beyond base size (size included) + pub burst: CoreSize, + + /// Number of CPU cores allocated + pub booked_cores: CoreSize, + + /// Number of GPUs allocated + pub gpus: u32, +} + +/// Represents an allocation (resource pool) in the system. +/// +/// An allocation is a pool of compute resources that can be assigned to shows +/// through subscriptions. It represents a logical grouping of hosts and their +/// resources within a facility. +/// +/// This is the internal business logic representation, isolated from database schema changes. +#[allow(dead_code)] +#[derive(Debug, Clone)] +pub struct Allocation { + /// Unique allocation identifier + pub id: Uuid, + + /// Allocation name + pub name: String, + + /// Whether this allocation can be edited + pub allow_edit: bool, + + /// Whether this is the default allocation + pub is_default: bool, + + /// Optional tag for categorization + pub tag: Option, + + /// Whether usage is billable + pub billable: bool, + + /// Facility ID that owns this allocation + pub facility_id: Uuid, + + /// Whether this allocation is enabled + pub enabled: bool, +} + +impl Subscription { + fn is_frozen(&self) -> bool { + // Setting a subscription burst to 0 will freeze it + self.burst.value() <= 0 + } + + pub fn bookable(&self, cores_required: &CoreSize) -> bool { + !self.is_frozen() && + // Booking the amount requested should leave at least one cores reminding + self.booked_cores.value() + cores_required.value() < self.burst.value() + } +} diff --git a/rust/crates/scheduler/src/models/virtual_proc.rs b/rust/crates/scheduler/src/models/virtual_proc.rs new file mode 100644 index 000000000..d1296c515 --- /dev/null +++ b/rust/crates/scheduler/src/models/virtual_proc.rs @@ -0,0 +1,38 @@ +use std::fmt::Display; + +use bytesize::ByteSize; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::models::{fmt_uuid, CoreSizeWithMultiplier, DispatchFrame}; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct VirtualProc { + pub proc_id: Uuid, + pub host_id: Uuid, + pub show_id: Uuid, + pub layer_id: Uuid, + pub job_id: Uuid, + pub frame_id: Uuid, + pub alloc_id: Uuid, + pub host_name: String, + pub cores_reserved: CoreSizeWithMultiplier, + pub memory_reserved: ByteSize, + pub gpus_reserved: u32, + pub gpu_memory_reserved: ByteSize, + pub os: String, + pub is_local_dispatch: bool, + pub frame: DispatchFrame, +} + +impl Display for VirtualProc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "(proc_id={}) {}->host={}", + fmt_uuid(&self.proc_id), + self.frame, + fmt_uuid(&self.host_id), + ) + } +} diff --git a/rust/crates/scheduler/src/pgpool.rs b/rust/crates/scheduler/src/pgpool.rs new file mode 100644 index 000000000..d380a6d5e --- /dev/null +++ b/rust/crates/scheduler/src/pgpool.rs @@ -0,0 +1,48 @@ +use std::{sync::Arc, time::Duration}; + +use miette::Result; +use sqlx::{postgres::PgPoolOptions, Pool, Postgres, Transaction}; +use tokio::sync::OnceCell; + +use crate::config::CONFIG; + +static CONNECTION_POOL: OnceCell>> = OnceCell::const_new(); + +/// Gets or initializes the global PostgreSQL connection pool. +/// +/// Uses configuration from `CONFIG.database` to establish pool settings including +/// max connections, timeouts, and connection URL. The pool is initialized once and +/// reused for all subsequent calls. +/// +/// # Returns +/// +/// * `Ok(Arc>)` - Shared reference to the connection pool +/// * `Err(sqlx::Error)` - Failed to create or connect to the pool +pub async fn connection_pool() -> Result>, sqlx::Error> { + let config = &CONFIG.database; + CONNECTION_POOL + .get_or_try_init(|| async { + let pool = PgPoolOptions::new() + .max_connections(config.pool_size) + .idle_timeout(Duration::from_secs(30)) + .acquire_timeout(Duration::from_secs(30)) + // 1 hour. (from_hours is still an experimental feature, + // see issue #140881 for more information) + .max_lifetime(Duration::from_secs(60 * 60)) + .connect(&config.connection_url()) + .await?; + Ok(Arc::new(pool)) + }) + .await + .map(Arc::clone) +} + +/// Begins a new database transaction from the connection pool. +/// +/// # Returns +/// +/// * `Ok(Transaction<'a, Postgres>)` - New database transaction +/// * `Err(sqlx::Error)` - Failed to begin transaction +pub async fn begin_transaction<'a>() -> Result, sqlx::Error> { + connection_pool().await?.begin().await +} diff --git a/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs b/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs new file mode 100644 index 000000000..2222ae4bf --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs @@ -0,0 +1,1564 @@ +use actix::{Actor, ActorFutureExt, Handler, ResponseActFuture, WrapFuture}; +use bytesize::{ByteSize, KIB, MIB}; +use chrono::Utc; +use futures::FutureExt; +use miette::{miette, Context, IntoDiagnostic, Result}; +use moka::future::Cache; +use sqlx::{Postgres, Transaction}; +use std::{collections::HashMap, sync::Arc, time::Duration}; +use tonic::transport::Channel; +use tracing::{debug, error, info, trace, warn}; +use uuid::Uuid; + +use crate::{ + allocation::allocation_service, + config::CONFIG, + dao::{FrameDao, FrameDaoError, HostDao, LayerDao, ProcDao, UpdatedHostResources}, + metrics, + models::{CoreSize, DispatchFrame, DispatchLayer, Host, VirtualProc}, + pgpool::begin_transaction, + pipeline::dispatcher::{ + error::{DispatchError, DispatchVirtualProcError, VirtualProcError}, + frame_set::FrameSet, + messages::{DispatchLayerMessage, DispatchResult}, + }, +}; +use opencue_proto::{ + host::ThreadMode, + rqd::{rqd_interface_client::RqdInterfaceClient, RqdStaticLaunchFrameRequest, RunFrame}, +}; + +/// Actor wrapper for RqdDispatcher that provides message-based dispatch operations. +/// +/// This actor handles: +/// - Message-driven frame dispatching with proper error isolation +/// - gRPC connection management and pooling +/// - Database transaction coordination +/// - Supervision and retry logic for external service failures +#[derive(Clone)] +pub struct RqdDispatcherService { + frame_dao: Arc, + layer_dao: Arc, + host_dao: Arc, + proc_dao: Arc, + rqd_connection_cache: Cache>, + dry_run_mode: bool, +} + +impl Actor for RqdDispatcherService { + type Context = actix::Context; + + fn started(&mut self, _ctx: &mut Self::Context) { + info!("RqdDispatcherService actor started"); + } + + fn stopped(&mut self, _ctx: &mut Self::Context) { + info!("RqdDispatcherService actor stopped"); + } +} + +impl Handler for RqdDispatcherService { + type Result = ResponseActFuture>; + + fn handle(&mut self, msg: DispatchLayerMessage, _ctx: &mut Self::Context) -> Self::Result { + let DispatchLayerMessage { layer, host } = msg; + + let dispatcher = self.clone(); + debug!( + "Received dispatch message for layer {} on host {}", + layer.layer_name, host.name + ); + + Box::pin( + async move { + // Note: In a real implementation, we would need to coordinate with a transaction manager + // or pass the transaction through the message. For now, we'll create a new transaction + // within the dispatcher's database operations. + + // Create a database transaction scope + let mut transaction = begin_transaction() + .await + .map_err(DispatchError::DbFailure)?; + + match dispatcher.dispatch(&layer, host, &mut transaction).await { + Ok((updated_host, updated_layer)) => { + // Commit the transaction + transaction + .commit() + .await + .map_err(DispatchError::DbFailure)?; + + Ok(DispatchResult { + updated_host, + updated_layer, + }) + } + Err(e) => { + // Rollback the transaction on error + if let Err(rollback_err) = transaction.rollback().await { + error!("Failed to rollback transaction: {}", rollback_err); + } + Err(e) + } + } + } + .into_actor(self) + .map(|result, _actor, _ctx| result), + ) + } +} + +impl RqdDispatcherService { + /// Creates a new RqdDispatcherService with the specified configuration. + /// + /// # Arguments + /// * `frame_dao` - Database access for frame operations + /// * `layer_dao` - Database access for layer operations + /// * `host_dao` - Database access for host operations and locking + /// * `dry_run_mode` - If true, logs dispatch actions without executing them + pub async fn new( + frame_dao: Arc, + layer_dao: Arc, + host_dao: Arc, + proc_dao: Arc, + dry_run_mode: bool, + ) -> Result { + let rqd_connection_cache = Cache::builder() + .max_capacity(100) + // 5 min. (from_hours is still an experimental feature) + .time_to_idle(Duration::from_secs(5 * 60)) + // 2 hours. (from_hours is still an experimental feature) + .time_to_live(Duration::from_secs(2 * 60 * 60)) + .build(); + + Ok(RqdDispatcherService { + frame_dao, + layer_dao, + host_dao, + proc_dao, + dry_run_mode, + rqd_connection_cache, + }) + } + + /// Dispatches a layer to a specific host with proper locking and error handling. + /// + /// The dispatch process: + /// 1. Acquires an exclusive lock on the target host + /// 2. Performs the actual dispatch operation + /// 3. Ensures the host lock is always released, even on panic or failure + /// + /// # Arguments + /// * `layer` - The layer containing frames to dispatch + /// * `host` - The target host for frame execution + /// + /// # Returns + /// * `Ok(())` on successful dispatch + /// * `Err(DispatchError)` on various failure conditions + async fn dispatch( + &self, + layer: &DispatchLayer, + host: Host, + transaction: &mut Transaction<'_, Postgres>, + ) -> Result<(Host, DispatchLayer), DispatchError> { + let host_id = host.id; + let host_disp = format!("{}", &host); + let layer_disp = format!("{}", &layer); + + // Acquire lock first + if !self + .host_dao + .lock(transaction, &host.id) + .await + .map_err(DispatchError::Failure)? + { + return Err(DispatchError::HostLock(host.name.clone())); + } + + // Ensure unlock is always called, regardless of panics or fails + let result = std::panic::AssertUnwindSafe(self.dispatch_inner(layer, host)) + .catch_unwind() + .await; + + // Always attempt to unlock, regardless of outcome. Failing to unlock here can be ignored as + // endint the transaction will automatically unlock. + if let Err(unlock_err) = self.host_dao.unlock(transaction, &host_id).await { + trace!("Failed to unlock host {}: {}", host_disp, unlock_err); + } + + // Handle the result from dispatch_inner + match result { + Ok(result) => { + if result.is_ok() { + info!( + "Successfully dispatched layer {} on {}.", + layer_disp, host_disp + ); + } + result + } + Err(_panic) => Err(DispatchError::Failure(miette!( + "Dispatch operation panicked for layer {} on host {}", + layer_disp, + host_id + ))), + } + } + + async fn dispatch_inner( + &self, + layer: &DispatchLayer, + host: Host, + ) -> Result<(Host, DispatchLayer), DispatchError> { + // A host should not book frames if its allocation is at or above its limit, + // but checking the limit before each frame is too costly. The tradeoff is + // to check the allocation state before entering the frame booking loop, + // with these there's a risk the allocation will go above burst, but not by + // a great margin as each loop only runs for a limited number of frames + // (see config queue.dispatch_frames_per_layer_limit) + let mut allocation_capacity = host.alloc_available_cores; + let allocation_name = host.alloc_name.clone(); + let mut last_host_version = host; + + let allocation_service = allocation_service().await.map_err(|err| { + DispatchError::Failure(err.wrap_err("Allocation Service is not available")) + })?; + // Use a closure for this validation to reduce the number of arguments that would be passed + // to dispatch_virtual_proc. + let is_subscription_bookable = |cores_requested| { + matches!(allocation_service.get_subscription(&allocation_name, &layer.show_id), + Some(subscription) if subscription.bookable(&cores_requested) + ) + }; + + // Deliberately cloning the layer to avoid requiring a mutable reference + let mut layer = layer.clone(); + let mut last_error = None; + let mut non_retrieable_frames = Vec::new(); + + // Use an unique id for all logs on this dispatch + let dispatch_id = Uuid::new_v4(); + + for frame in &layer.frames { + let frame_str = format!("{}", frame); + + let (virtual_proc, updated_host) = match Self::consume_host_virtual_resources( + frame, + &last_host_version, + CONFIG.queue.memory_stranded_threshold, + ) + .await + { + Ok(result) => result, + Err(VirtualProcError::HostResourcesExtinguished(msg)) => { + debug!( + "({dispatch_id}) Host resourse extinguished for {}. {}", + last_host_version, msg + ); + break; + } + }; + + debug!( + "({dispatch_id}) Host {} will have {} cores available after update", + updated_host.id, updated_host.idle_cores + ); + + // Each proc should run on its own transaction + let mut proc_transaction = begin_transaction() + .await + .map_err(DispatchError::DbFailure)?; + + // Before dispatching, confirm the layer still has limits + if !self + .layer_dao + .check_limits(&mut proc_transaction, &layer) + .await + .map_err(DispatchError::DbFailure)? + { + proc_transaction + .rollback() + .await + .map_err(DispatchError::DbFailure)?; + info!("({dispatch_id}) Skiping layer {}, reached limits", layer); + + // Skip the entire layer + break; + } + + match self + .dispatch_virtual_proc( + dispatch_id, + virtual_proc, + updated_host, + &mut proc_transaction, + &is_subscription_bookable, + allocation_capacity, + ) + .await + { + Ok((new_host, new_allocation_capacity)) => { + proc_transaction + .commit() + .await + .map_err(DispatchError::DbFailure)?; + + // Track successful frame dispatch + metrics::increment_frames_dispatched(); + + // Track time from frame updated_at to dispatch + if let Ok(elapsed) = frame.updated_at.elapsed() { + metrics::observe_time_to_book(elapsed); + } + + non_retrieable_frames.push(frame.id); + allocation_capacity = new_allocation_capacity; + last_host_version = new_host; + } + Err(err) => { + proc_transaction + .rollback() + .await + .map_err(DispatchError::DbFailure)?; + + match err { + DispatchVirtualProcError::AllocationOverBurst(err) => { + info!("({dispatch_id}) {frame_str} {err}"); + + last_error = Some(err); + break; + } + DispatchVirtualProcError::FailedToStartOnDb(err) => { + // Something is not right with this frame on the database. log error and give + // the next frame a chance. If there was already an error like this on previous + // frames, give up. + if last_error.is_some() { + break; + } + warn!( + "({dispatch_id}) Failed to start frame {} on Db. {}", + frame_str, err + ); + last_error = Some(err); + // IMPORTANT: Do NOT update last_host_version here since the transaction + // rolled back and we didn't actually consume any resources from the database. + // The next iteration should use the same host state as before. + continue; + } + DispatchVirtualProcError::FrameCouldNotBeUpdated => { + // The entire transaction is probably compromised, stop working on this layer + info!( + "({dispatch_id}) Frame {} couldn't be updated on the database. Version has changed.", + frame_str + ); + non_retrieable_frames.push(frame.id); + break; + } + DispatchVirtualProcError::RqdConnectionFailed { host, error } => { + // An error here means connection with this host is probably broken, + // there's no reason to attempt the next frame + warn!( + "({dispatch_id}) Failed to connect to rqd on {} to launch frame. {:?}", + host, error + ); + + break; + } + } + } + } + } + + if non_retrieable_frames.is_empty() { + info!( + "Found no frames on {} to dispatch to {}", + layer, last_host_version + ); + } else { + debug!( + "Dispatched {} frames on {}: ", + non_retrieable_frames.len(), + layer + ); + for proc in &non_retrieable_frames { + trace!("{}", proc); + } + } + + // Only drain successful frames. Failed frames will get retried on the next host candidate + if !non_retrieable_frames.is_empty() { + layer.drain_frames(non_retrieable_frames); + } + + if let Some(error) = last_error { + warn!("Wasn't able to dispatch all frames: {:?}", error) + } + Ok((last_host_version, layer)) + } + + /// Dispatches a virtual proc to a host, handling allocation checks, database updates, and RQD communication. + /// + /// This function encapsulates the complete dispatch process for a single virtual proc: + /// 1. Validates allocation capacity against subscription limits + /// 2. Updates frame status in the database + /// 3. Launches the frame on RQD (or logs in dry-run mode) + /// 4. Updates host resources and proc records in the database + /// + /// # Arguments + /// * `virtual_proc` - The virtual proc to dispatch + /// * `updated_host` - The host with updated resource allocations + /// * `transaction` - Database transaction for atomic updates + /// * `is_subscription_bookable` - Closure to check if subscription can accept more cores + /// * `allocation_capacity` - Current available allocation capacity + /// + /// # Returns + /// On success, returns a tuple of (updated host, new allocation capacity). + /// On failure, returns a `DispatchVirtualProcError` indicating the specific failure mode. + async fn dispatch_virtual_proc( + &self, + dispatch_id: Uuid, + virtual_proc: VirtualProc, + host: Host, + transaction: &mut Transaction<'_, Postgres>, + is_subscription_bookable: &impl Fn(CoreSize) -> bool, + allocation_capacity: CoreSize, + ) -> Result<(Host, CoreSize), DispatchVirtualProcError> { + trace!("({dispatch_id}) Built virtual proc {}", virtual_proc); + let cores_reserved_without_multiplier: CoreSize = virtual_proc.cores_reserved.into(); + + // Check allocation capacity in two ways + // - Check the cached subscription to account for external bookings + // - Check for cores consumed by this dispatcher iteration + if !is_subscription_bookable(cores_reserved_without_multiplier) + || cores_reserved_without_multiplier > allocation_capacity + { + return Err(DispatchVirtualProcError::AllocationOverBurst( + DispatchError::AllocationOverBurst(host.alloc_name.clone()), + )); + } + let new_allocation_capacity = allocation_capacity - virtual_proc.cores_reserved.into(); + + // Update database + let updated_resources = self + .update_database_for_dispatch(transaction, &virtual_proc, host.id, dispatch_id) + .await?; + + // When running on dry_run_mode, just log the outcome + if self.dry_run_mode { + debug!( + "(DRY_RUN) ({dispatch_id}) Dispatching {} on {}", + virtual_proc, &host + ); + } else { + self.launch_on_rqd(&virtual_proc, &host, true) + .await + .map_err(|err| DispatchVirtualProcError::RqdConnectionFailed { + host: host.to_string(), + error: miette!("{}", err), + })?; + } + + // Update the host struct with the actual database values after the update + // to ensure cache and database stay in sync + let mut updated_host = host; + updated_host.idle_cores = CoreSize::from_multiplied( + updated_resources + .cores_idle + .try_into() + .expect("db_cores_idle should fit in i32"), + ); + updated_host.idle_memory = ByteSize::kb(updated_resources.mem_idle as u64); + updated_host.idle_gpus = updated_resources + .gpus_idle + .try_into() + .expect("db_gpus_idle should fit in u32"); + updated_host.idle_gpu_memory = ByteSize::kb(updated_resources.gpu_mem_idle as u64); + updated_host.last_updated = updated_resources.last_updated; + + Ok((updated_host, new_allocation_capacity)) + } + + /// Updates database records for frame dispatch. + /// + /// This function performs three database operations atomically: + /// 1. Updates the frame status to "started" + /// 2. Inserts the virtual proc record + /// 3. Updates host resource allocations + /// + /// # Arguments + /// * `transaction` - Database transaction for atomic updates + /// * `virtual_proc` - The virtual proc being dispatched + /// * `host_id` - ID of the host receiving the dispatch + /// * `dispatch_id` - Unique identifier for this dispatch operation + /// + /// # Returns + /// On success, returns the updated host resources from the database. + /// On failure, returns a `DispatchVirtualProcError` indicating the specific failure mode. + async fn update_database_for_dispatch( + &self, + transaction: &mut Transaction<'_, Postgres>, + virtual_proc: &VirtualProc, + host_id: Uuid, + dispatch_id: Uuid, + ) -> Result { + self.frame_dao + .update_frame_started(transaction, virtual_proc) + .await + .map_err(|err| match err { + FrameDaoError::FrameCouldNotBeUpdated => { + DispatchVirtualProcError::FrameCouldNotBeUpdated + } + FrameDaoError::DbFailure(err) => DispatchVirtualProcError::FailedToStartOnDb( + DispatchError::FailedToStartOnDb(err), + ), + })?; + + self.proc_dao + .insert(transaction, virtual_proc) + .await + .map_err(|(error, frame_id, host_id)| { + DispatchVirtualProcError::FailedToStartOnDb(DispatchError::FailedToCreateProc { + error, + frame_id, + host_id, + }) + })?; + + let updated_resources = self + .host_dao + .update_resources(transaction, &host_id, virtual_proc, dispatch_id) + .await + .map_err(|err| { + DispatchVirtualProcError::FailedToStartOnDb(DispatchError::FailedToUpdateResources( + err, + )) + })?; + + Ok(updated_resources) + } + + async fn launch_on_rqd( + &self, + virtual_proc: &VirtualProc, + host: &Host, + can_retry: bool, + ) -> Result<(), DispatchError> { + debug!("Dispatching {} on {}", virtual_proc, host); + + let run_frame = + Self::prepare_rqd_run_frame(virtual_proc).map_err(DispatchError::Failure)?; + debug!("Prepared run_frame for {}", virtual_proc); + + let request = RqdStaticLaunchFrameRequest { + run_frame: Some(run_frame), + }; + + let mut rqd_client = self + .get_rqd_connection(&host.name, CONFIG.rqd.grpc_port) + .await + .map_err(|err| DispatchError::FailureGrpcConnection(host.name.clone(), err))?; + + // Launch frame on rqd + match rqd_client.launch_frame(request).await { + Ok(_) => Ok(()), + Err(status) => { + match status.code() { + tonic::Code::Unauthenticated + | tonic::Code::Unavailable + | tonic::Code::Aborted + | tonic::Code::PermissionDenied + | tonic::Code::DeadlineExceeded + | tonic::Code::Internal + | tonic::Code::Unknown => { + warn!("Failed to launch on rqd. {}", status.message()); + // Invalidate entry to force a new connection on the next interaction + self.rqd_connection_cache.invalidate(&host.name).await; + + if can_retry { + // Retry once in case the cached connection was interrupted + Box::pin(self.launch_on_rqd(virtual_proc, host, false)).await + } else { + Err(DispatchError::GrpcFailure(status)) + } + } + _ => { + warn!("Unretrieable failure on rqd launch. {:?}", status.message()); + // Invalidate entry to force a new connection + self.rqd_connection_cache.invalidate(&host.name).await; + + Err(DispatchError::GrpcFailure(status)) + } + } + } + } + } + + /// Calculates the actual number of cores requested based on frame requirements. + /// + /// Handles special core request semantics: + /// - Negative values: Reserve all cores except the specified amount + /// - Zero: Reserve all cores on the host + /// - Positive values: Reserve the exact amount requested + /// + /// # Arguments + /// * `cores_requested` - The raw core request from the frame + /// * `total_cores` - Total cores available on the host + /// + /// # Returns + /// The calculated number of cores to actually request + fn calculate_cores_requested(cores_requested: CoreSize, total_cores: CoreSize) -> CoreSize { + // Requesting NEGATIVE cores is actually reserving ALL but the number of cores requeted + if cores_requested.value() < 0 { + total_cores + cores_requested + // Requesting ZERO cores is actually reserving ALL cores on the host + } else if cores_requested.value() == 0 { + total_cores + // Requesting POSITIVE cores + } else { + cores_requested + } + } + + /// Calculates the number of cores to reserve for a frame on a specific host. + /// + /// Takes into account: + /// - Host thread mode (All, Variable, Auto) + /// - Frame threadability + /// - Memory requirements and stranded thresholds + /// - Selfish services and resource availability + /// + /// This method doesn't check if the host has resources available to fulfill the demand + /// + /// # Arguments + /// * `host` - The target host with available resources + /// * `frame` - The frame requiring resources + /// * `memory_stranded_threshold` - Threshold for memory-stranded frame detection + /// + /// # Returns + /// * CoreSize - Number of cores to reserve + fn calculate_core_reservation( + host: &Host, + frame: &DispatchFrame, + memory_stranded_threshold: ByteSize, + ) -> CoreSize { + let cores_requested = Self::calculate_cores_requested(frame.min_cores, host.total_cores); + + match (host.thread_mode, frame.threadable) { + (ThreadMode::All, _) => host.idle_cores, + // Limit Variable booking to at least 2 cores + (ThreadMode::Variable, true) if cores_requested.value() <= 2 => CoreSize(2), + (ThreadMode::Auto, true) | (ThreadMode::Variable, true) => { + // Book whatever is left for hosts with selfish services or memory stranded + if frame.has_selfish_service + || host + .idle_memory + .as_u64() + .saturating_sub(frame.min_memory.as_u64()) + <= memory_stranded_threshold.as_u64() + { + host.idle_cores + } else { + Self::calculate_memory_balanced_core_count(host, frame, cores_requested) + } + } + _ => cores_requested, + } + } + + /// Consumes a host(HostModel) and returns an updated version accounting for consumed resources + /// eg. + /// HostModel(2 cores, 20GB) + frame(1 core, 10GB) + /// -> VirtualProc(1core, 10GB) + HostModel(1 core, 10GB) + async fn consume_host_virtual_resources( + frame: &DispatchFrame, + host: &Host, + memory_stranded_threshold: ByteSize, + ) -> Result<(VirtualProc, Host), VirtualProcError> { + let mut host = host.clone(); + + let cores_reserved = + Self::calculate_core_reservation(&host, frame, memory_stranded_threshold); + + if cores_reserved > host.total_cores || cores_reserved > host.idle_cores { + Err(VirtualProcError::HostResourcesExtinguished(format!( + "Not enough cores: {} < {}", + host.idle_cores, cores_reserved + )))? + } + + if host.idle_memory < frame.min_memory { + Err(VirtualProcError::HostResourcesExtinguished(format!( + "Not enough memory: {}mb < {}mb", + host.idle_memory.as_u64() / MIB, + frame.min_memory.as_u64() / MIB + )))? + } + + if host.idle_gpus < frame.min_gpus { + Err(VirtualProcError::HostResourcesExtinguished(format!( + "Not enough GPU cores: {} < {}", + host.idle_gpus, frame.min_gpus + )))? + } + + if host.idle_gpu_memory < frame.min_gpu_memory { + Err(VirtualProcError::HostResourcesExtinguished(format!( + "Not enough GPU memory: {}mb < {}mb", + host.idle_gpu_memory.as_u64() / MIB, + frame.min_gpu_memory.as_u64() / MIB + )))? + } + + let memory_reserved = frame.min_memory; + let gpus_reserved = frame.min_gpus; + let gpu_memory_reserved = frame.min_gpu_memory; + + // Update host resources + host.idle_cores = host.idle_cores - cores_reserved; + host.idle_memory = ByteSize(host.idle_memory.as_u64() - memory_reserved.as_u64()); + host.idle_gpus -= gpus_reserved; + host.idle_gpu_memory = + ByteSize(host.idle_gpu_memory.as_u64() - gpu_memory_reserved.as_u64()); + // Field will be overwritten with database values as soon as the changes are committed + host.last_updated = Utc::now(); + + Ok(( + VirtualProc { + proc_id: Uuid::new_v4(), + host_id: host.id, + show_id: frame.show_id, + layer_id: frame.layer_id, + job_id: frame.job_id, + frame_id: frame.id, + alloc_id: host.alloc_id, + cores_reserved: cores_reserved.into(), + memory_reserved, + gpus_reserved, + gpu_memory_reserved, + os: host.str_os.clone().unwrap_or_default(), + is_local_dispatch: false, + frame: frame.clone(), + host_name: host.name.clone(), + }, + host, + )) + } + + /// Calculates a memory-balanced core count to prevent resource imbalance. + /// + /// Ensures that core allocation is proportional to memory requirements + /// to avoid situations where memory or cores become stranded. + /// + /// # Arguments + /// * `host` - The host with available resources + /// * `frame` - The frame with memory and core requirements + /// * `cores_requested` - The number of cores originally requested + /// + /// # Returns + /// The balanced number of cores to allocate + fn calculate_memory_balanced_core_count( + host: &Host, + frame: &DispatchFrame, + cores_requested: CoreSize, + ) -> CoreSize { + let total_cores = host.total_cores.value() as f64; + let total_memory = host.total_memory.as_u64() as f64; + let frame_min_memory = frame.min_memory.as_u64() as f64; + + // Memory per core if evently distributed + let memory_per_core = total_memory / total_cores; + + // How many cores worth of memory the frame needs + let mut cores_worth_of_memory = (frame_min_memory / memory_per_core.round()) as i32; + + // If frame requested more than the memory-balanced core count, use frame's request + if cores_worth_of_memory < cores_requested.value() { + cores_worth_of_memory = cores_requested.value(); + } + // Don't book above max_core limit + if let Some(layer_cores_limit) = frame.layer_cores_limit { + if layer_cores_limit.value() > 0 && cores_worth_of_memory > layer_cores_limit.value() { + cores_worth_of_memory = layer_cores_limit.value(); + } + } + + CoreSize(cores_worth_of_memory) + } + + /// Calculate a new frame spec from an original frame_range and a chunk definition + /// + /// # Arguments + /// + /// * `initial_frame_number` - The starting frame number to begin the chunk from + /// * `frame_range` - A string representation of the frame range (e.g., "1-100") + /// * `chunk_size` - The number of frames to include in the chunk + /// + /// # Returns + /// + /// Returns a `Result` containing a tuple of: + /// * `String` - The frame specification string for the chunk + /// * `i32` - The last frame number in the chunk + /// + /// # Errors + /// + /// This function will return an error if: + /// * The frame range string is invalid + /// * The initial frame number is not within the specified range + /// * The chunk cannot be generated from the given parameters + /// * The chunk frame set is empty or invalid + fn prepare_frame_spec( + initial_frame_number: i32, + frame_range: &str, + chunk_size: usize, + ) -> Result<(String, i32)> { + let frame_set = FrameSet::new(frame_range)?; + let start_index = frame_set.index(initial_frame_number).ok_or(miette!( + "Invalid frame number {}. Out of range {}", + initial_frame_number, + frame_range + ))?; + let frame_spec = frame_set + .get_chunk(start_index, chunk_size) + .wrap_err("Invalid Chunk")?; + let chunk_frame_set = FrameSet::new(&frame_spec)?; + let chunk_end_frame = chunk_frame_set.last().ok_or(miette!( + "Could not find last frame of the chunk {}", + frame_spec + ))?; + + Ok((frame_spec, chunk_end_frame)) + } + + /// Prepares a RunFrame message for RQD execution. + /// + /// Converts a VirtualProc into the protobuf RunFrame format required by RQD, + /// including: + /// - Environment variable setup (CUE_*, frame metadata) + /// - Command token replacement (#FRAME#, #LAYER#, etc.) + /// - Resource allocation specifications + /// - Frame timing and execution context + /// + /// A frame name shall follow the format [number]-[layer_name] + /// + /// # Arguments + /// * `proc` - The virtual proc containing frame and resource information + /// + /// # Returns + /// * `Ok(RunFrame)` - The prepared RQD RunFrame message + /// * `Err(miette::Error)` - If frame preparation fails + fn prepare_rqd_run_frame(proc: &VirtualProc) -> Result { + // Calculate threads from cores reserved + let proc_cores_reserved: CoreSize = proc.cores_reserved.into(); + let threads = std::cmp::max(CoreSize(1), proc_cores_reserved); + let frame = &proc.frame; + + // Extract frame number from frame name (assumes format "frameNumber-...") + let frame_number = frame + .frame_name + .split('-') + .next() + .and_then(|s| s.parse::().ok()) + .ok_or(miette!("Invalid Frame Number"))?; + + let z_frame_number = format!("{:04}", frame_number); + + let (frame_spec, chunk_end_frame) = + Self::prepare_frame_spec(frame_number, &frame.range, frame.chunk_size as usize)?; + + // Build environment variables + let mut environment = HashMap::new(); + environment.insert("CUE3".to_string(), "1".to_string()); + environment.insert("CUE_THREADS".to_string(), threads.to_string()); + environment.insert("CUE_MEMORY".to_string(), proc.memory_reserved.to_string()); + environment.insert("CUE_GPUS".to_string(), proc.gpus_reserved.to_string()); + environment.insert( + "CUE_GPU_MEMORY".to_string(), + proc.gpu_memory_reserved.to_string(), + ); + environment.insert("CUE_LOG_PATH".to_string(), frame.log_dir.clone()); + environment.insert("CUE_RANGE".to_string(), frame.range.clone()); + environment.insert("CUE_CHUNK".to_string(), frame.chunk_size.to_string()); + environment.insert("CUE_IFRAME".to_string(), frame_number.to_string()); + environment.insert("CUE_LAYER".to_string(), frame.layer_name.clone()); + environment.insert("CUE_JOB".to_string(), frame.job_name.clone()); + environment.insert("CUE_FRAME".to_string(), frame.frame_name.clone()); + environment.insert("CUE_SHOW".to_string(), frame.show_name.clone()); + environment.insert("CUE_SHOT".to_string(), frame.shot.clone()); + environment.insert("CUE_USER".to_string(), frame.user.clone()); + environment.insert("CUE_JOB_ID".to_string(), frame.job_id.to_string()); + environment.insert("CUE_LAYER_ID".to_string(), frame.layer_id.to_string()); + environment.insert("CUE_FRAME_ID".to_string(), frame.id.to_string()); + environment.insert( + "CUE_THREADABLE".to_string(), + if frame.threadable { "1" } else { "0" }.to_string(), + ); + + // Process command with token replacements + let processed_command = frame + .command + .replace("#ZFRAME#", &z_frame_number) + .replace("#IFRAME#", &frame_number.to_string()) + .replace("#FRAME_START#", &frame_number.to_string()) + .replace("#FRAME_END#", &chunk_end_frame.to_string()) + .replace("#FRAME_CHUNK#", &frame.chunk_size.to_string()) + .replace("#LAYER#", &frame.layer_name) + .replace("#JOB#", &frame.job_name) + .replace("#FRAMESPEC#", &frame_spec) + .replace("#FRAME#", &frame.frame_name); + + // Calculate memory limits on Kb + let soft_memory_limit = ((frame.min_memory.as_u64() / KIB) as f64 + * CONFIG.queue.frame_memory_soft_limit) + .round() as i64; + let hard_memory_limit = ((frame.min_memory.as_u64() / KIB) as f64 + * CONFIG.queue.frame_memory_hard_limit) + .round() as i64; + + // Build RunFrame + let run_frame = RunFrame { + shot: frame.shot.clone(), + show: frame.show_name.clone(), + user_name: frame.user.clone(), + log_dir: frame.log_dir.clone(), + job_id: frame.job_id.to_string(), + job_name: frame.job_name.clone(), + frame_id: frame.id.to_string(), + frame_name: frame.frame_name.clone(), + layer_id: frame.layer_id.to_string(), + resource_id: proc.proc_id.to_string(), + num_cores: proc.cores_reserved.value(), + num_gpus: proc.gpus_reserved as i32, + start_time: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as i64) + .unwrap_or(0), + ignore_nimby: proc.is_local_dispatch, + os: proc.os.clone(), + soft_memory_limit, + hard_memory_limit, + loki_url: frame.loki_url.as_ref().unwrap_or(&String::new()).clone(), + environment, + command: processed_command, + uid_optional: frame + .uid + .map(opencue_proto::rqd::run_frame::UidOptional::Uid), + frame_temp_dir: String::new(), // Will be set by RQD + gid: 0, // Will be set by RQD based on user + attributes: HashMap::new(), + children: None, + pid: 0, // Will be set by RQD + + // Deprecated fields + #[allow(deprecated)] + job_temp_dir: "deprecated".to_string(), + #[allow(deprecated)] + log_file: "deprecated".to_string(), + #[allow(deprecated)] + log_dir_file: "deprecated".to_string(), + }; + + Ok(run_frame) + } + + /// Establishes a gRPC connection to an RQD instance. + /// + /// # Arguments + /// * `hostname` - The hostname or IP address of the RQD instance + /// * `port` - The gRPC port number for the RQD service + /// + /// # Returns + /// * `Ok(RqdInterfaceClient)` - Connected gRPC client + /// * `Err(miette::Error)` - If connection fails + async fn get_rqd_connection( + &self, + hostname: &str, + port: u32, + ) -> Result> { + self.rqd_connection_cache + .entry(hostname.to_string()) + .or_optionally_insert_with(async { + // Build endpoint with timeout and keep-alive settings + let endpoint = Channel::from_shared(format!("http://{}:{}", hostname, port)) + .into_diagnostic() + .ok()? + // Connection timeout - how long to wait when establishing connection + .connect_timeout(Duration::from_secs(10)) + // Request timeout - maximum time for a single request + .timeout(Duration::from_secs(30)) + // Keep-alive configuration + .http2_keep_alive_interval(Duration::from_secs(30)) + .keep_alive_timeout(Duration::from_secs(10)) + // Send keep-alive ping even when no active streams + .keep_alive_while_idle(true); + + RqdInterfaceClient::connect(endpoint) + .await + .into_diagnostic() + .ok() + }) + .await + .map(|e| e.into_value()) + .ok_or(miette!("Failed to connect to {} grpc server", hostname)) + } +} + +#[cfg(test)] +mod tests { + use std::time::SystemTime; + + use super::*; + use crate::models::{CoreSize, DispatchFrame, Host}; + use bytesize::ByteSize; + use opencue_proto::host::ThreadMode; + use uuid::Uuid; + + // Helper function to create a test host + fn create_test_host() -> Host { + Host::new_for_test( + Uuid::new_v4(), + "test-host".to_string(), + Some("linux".to_string()), + CoreSize(8), + ByteSize::gib(16), + CoreSize(4), + ByteSize::gib(8), + 1, + ByteSize::gib(4), + ThreadMode::Variable, + CoreSize(4), + Uuid::new_v4(), + "test-alloc".to_string(), + ) + } + + // Helper function to create a test dispatch frame + fn create_test_dispatch_frame() -> DispatchFrame { + DispatchFrame { + id: Uuid::new_v4(), + frame_name: "0001-test_frame".to_string(), + show_id: Uuid::new_v4(), + facility_id: Uuid::new_v4(), + job_id: Uuid::new_v4(), + layer_id: Uuid::new_v4(), + command: "echo 'test command'".to_string(), + range: "1-10".to_string(), + chunk_size: 1, + show_name: "test_show".to_string(), + shot: "test_shot".to_string(), + user: "test_user".to_string(), + uid: Some(1000), + log_dir: "/tmp/logs".to_string(), + layer_name: "test_layer".to_string(), + job_name: "test_job".to_string(), + min_cores: CoreSize(1), + layer_cores_limit: None, + threadable: true, + has_selfish_service: false, + min_gpus: 0, + min_gpu_memory: ByteSize::gb(0), + min_memory: ByteSize::gib(2), + services: None, + os: Some("linux".to_string()), + loki_url: None, + version: 1, + updated_at: SystemTime::now(), + } + } + + #[test] + fn test_calculate_cores_requested_positive() { + let result = RqdDispatcherService::calculate_cores_requested(CoreSize(4), CoreSize(8)); + assert_eq!(result, CoreSize(4)); + } + + #[test] + fn test_calculate_cores_requested_zero() { + let result = RqdDispatcherService::calculate_cores_requested(CoreSize(0), CoreSize(8)); + assert_eq!(result, CoreSize(8)); + } + + #[test] + fn test_calculate_cores_requested_negative() { + let result = RqdDispatcherService::calculate_cores_requested(CoreSize(-2), CoreSize(8)); + assert_eq!(result, CoreSize(6)); + } + + #[tokio::test] + async fn test_calculate_core_reservation_thread_mode_all() { + let mut host = create_test_host(); + host.thread_mode = ThreadMode::All; + host.idle_cores = CoreSize(6); + + let frame = create_test_dispatch_frame(); + let memory_threshold = ByteSize::mib(500); + + let result = + RqdDispatcherService::calculate_core_reservation(&host, &frame, memory_threshold); + assert_eq!(result, CoreSize(6)); // Should return idle_cores + } + + #[tokio::test] + async fn test_calculate_core_reservation_variable_threadable_small_request() { + let mut host = create_test_host(); + host.thread_mode = ThreadMode::Variable; + + let mut frame = create_test_dispatch_frame(); + frame.threadable = true; + frame.min_cores = CoreSize(1); + + let memory_threshold = ByteSize::mib(500); + + let result = + RqdDispatcherService::calculate_core_reservation(&host, &frame, memory_threshold); + assert_eq!(result, CoreSize(2)); // Should return 2 cores minimum + } + + #[tokio::test] + async fn test_calculate_core_reservation_not_threadable() { + let host = create_test_host(); + let mut frame = create_test_dispatch_frame(); + frame.threadable = false; + frame.min_cores = CoreSize(3); + + let memory_threshold = ByteSize::mib(500); + + let result = + RqdDispatcherService::calculate_core_reservation(&host, &frame, memory_threshold); + assert_eq!(result, CoreSize(3)); // Should return cores_requested + } + + #[tokio::test] + async fn test_calculate_core_reservation_insufficient_cores() { + let mut host = create_test_host(); + host.idle_cores = CoreSize(2); + + let mut frame = create_test_dispatch_frame(); + frame.min_cores = CoreSize(10); // More than available + + let memory_threshold = ByteSize::mib(500); + + let result = + RqdDispatcherService::calculate_core_reservation(&host, &frame, memory_threshold); + // Method shouldn't check for resource availability + assert_eq!(result, CoreSize(10)); + } + + #[test] + fn test_calculate_memory_balanced_core_count_exact_calculation() { + // Create a host with precise values to test the calculation + let host = Host::new_for_test( + Uuid::new_v4(), + "test-host".to_string(), + Some("linux".to_string()), + CoreSize(8), // 8 cores + ByteSize::gib(16), // 16 GB total memory + CoreSize(4), + ByteSize::gib(8), + 1, + ByteSize::gib(4), + ThreadMode::Variable, + CoreSize(4), + Uuid::new_v4(), + "test-alloc".to_string(), + ); + + let mut frame = create_test_dispatch_frame(); + frame.min_memory = ByteSize::gib(4); // Frame needs 4GB + + let cores_requested = CoreSize(1); + + let result = RqdDispatcherService::calculate_memory_balanced_core_count( + &host, + &frame, + cores_requested, + ); + + // With 8 cores and 16GB, each core gets 2GB on average + // Frame needs 4GB, so it should get 2 cores worth of memory + // Since cores_requested (1) < cores_worth_of_memory (2), should return 2 + assert_eq!(result.value(), 2); + } + + #[test] + fn test_calculate_memory_balanced_core_count_high_memory_frame() { + // Create a host with precise values + let host = Host::new_for_test( + Uuid::new_v4(), + "test-host".to_string(), + Some("linux".to_string()), + CoreSize(4), // 4 cores + ByteSize::gib(8), // 8 GB total memory + CoreSize(4), + ByteSize::gib(8), + 1, + ByteSize::gib(4), + ThreadMode::Variable, + CoreSize(4), + Uuid::new_v4(), + "test-alloc".to_string(), + ); + + let mut frame = create_test_dispatch_frame(); + frame.min_memory = ByteSize::gib(6); // Frame needs 6GB - more than half + + let cores_requested = CoreSize(1); + + let result = RqdDispatcherService::calculate_memory_balanced_core_count( + &host, + &frame, + cores_requested, + ); + + // With 4 cores and 8GB, each core gets 2GB on average + // Frame needs 6GB, so it should get 3 cores worth of memory + // Since cores_requested (1) < cores_worth_of_memory (3), should return 3 + assert_eq!(result.value(), 3); + } + + #[test] + fn test_calculate_memory_balanced_core_count_low_memory_frame() { + // Create a host with precise values + let host = Host::new_for_test( + Uuid::new_v4(), + "test-host".to_string(), + Some("linux".to_string()), + CoreSize(8), // 8 cores + ByteSize::gib(32), // 32 GB total memory + CoreSize(8), + ByteSize::gib(32), + 1, + ByteSize::gib(16), + ThreadMode::Variable, + CoreSize(8), + Uuid::new_v4(), + "test-alloc".to_string(), + ); + + let mut frame = create_test_dispatch_frame(); + frame.min_memory = ByteSize::gib(2); // Frame needs only 2GB + + let cores_requested = CoreSize(4); // But requests 4 cores + + let result = RqdDispatcherService::calculate_memory_balanced_core_count( + &host, + &frame, + cores_requested, + ); + + // With 8 cores and 32GB, each core gets 4GB on average + // Frame needs 2GB, so memory-wise it only needs 0.5 cores worth (rounds to 1) + // Since cores_requested (4) > cores_worth_of_memory (1), should return cores_requested (4) + assert_eq!(result.value(), 4); + } + + #[test] + fn test_calculate_memory_balanced_core_count_with_layer_limit() { + let host = Host::new_for_test( + Uuid::new_v4(), + "test-host".to_string(), + Some("linux".to_string()), + CoreSize(8), + ByteSize::gib(16), + CoreSize(8), + ByteSize::gib(16), + 1, + ByteSize::gib(8), + ThreadMode::Variable, + CoreSize(8), + Uuid::new_v4(), + "test-alloc".to_string(), + ); + + let mut frame = create_test_dispatch_frame(); + frame.layer_cores_limit = Some(CoreSize(2)); // Limit to 2 cores + frame.min_memory = ByteSize::gib(8); // High memory requirement (would want 4 cores normally) + + let cores_requested = CoreSize(1); + + let result = RqdDispatcherService::calculate_memory_balanced_core_count( + &host, + &frame, + cores_requested, + ); + + // With 8 cores and 16GB, each core gets 2GB + // Frame needs 8GB, so memory-wise it needs 4 cores + // But layer limit is 2, so should be capped at 2 + assert_eq!(result.value(), 2); + } + + #[test] + fn test_prepare_frame_spec_basic() { + let result = RqdDispatcherService::prepare_frame_spec(5, "1-10", 1); + assert!(result.is_ok()); + let (frame_spec, last_frame) = result.unwrap(); + assert_eq!(frame_spec, "5"); + assert_eq!(last_frame, 5); + } + + #[test] + fn test_prepare_frame_spec_chunk() { + let result = RqdDispatcherService::prepare_frame_spec(5, "1-10", 3); + assert!(result.is_ok()); + let (frame_spec, last_frame) = result.unwrap(); + assert_eq!(frame_spec, "5-7"); + assert_eq!(last_frame, 7); + } + + #[test] + fn test_prepare_frame_spec_invalid_frame() { + let result = RqdDispatcherService::prepare_frame_spec(15, "1-10", 1); + assert!(result.is_err()); + } + + #[test] + fn test_prepare_frame_spec_invalid_range() { + let result = RqdDispatcherService::prepare_frame_spec(5, "invalid-range", 1); + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_consume_host_virtual_resources_success() { + let frame = create_test_dispatch_frame(); + let host = create_test_host(); + let memory_stranded_threshold = ByteSize::gib(1); + + let result = RqdDispatcherService::consume_host_virtual_resources( + &frame, + &host, + memory_stranded_threshold, + ) + .await; + + assert!(result.is_ok()); + let (virtual_proc, updated_host) = result.unwrap(); + + // Check virtual proc creation + assert_eq!(virtual_proc.host_id, host.id); + assert_eq!(virtual_proc.memory_reserved, frame.min_memory); + assert_eq!(virtual_proc.gpus_reserved, frame.min_gpus); + assert_eq!(virtual_proc.gpu_memory_reserved, frame.min_gpu_memory); + assert_eq!(virtual_proc.frame.id, frame.id); + // proc_id should be a valid UUID (non-nil) + assert_ne!(virtual_proc.proc_id, Uuid::nil()); + + // Check host resource consumption + assert!(updated_host.idle_cores < host.idle_cores); + assert_eq!( + updated_host.idle_memory.as_u64(), + host.idle_memory.as_u64() - frame.min_memory.as_u64() + ); + assert_eq!(updated_host.idle_gpus, host.idle_gpus - frame.min_gpus); + assert_eq!( + updated_host.idle_gpu_memory.as_u64(), + host.idle_gpu_memory.as_u64() - frame.min_gpu_memory.as_u64() + ); + } + + #[tokio::test] + async fn test_consume_host_virtual_resources_insufficient_memory() { + let mut frame = create_test_dispatch_frame(); + frame.min_memory = ByteSize::gib(64); // More than host has + frame.min_cores = CoreSize(1); + let host = create_test_host(); + let memory_stranded_threshold = ByteSize::gib(1); + + let result = RqdDispatcherService::consume_host_virtual_resources( + &frame, + &host, + memory_stranded_threshold, + ) + .await; + + assert!(result.is_err()); + match result { + Err(VirtualProcError::HostResourcesExtinguished(msg)) => { + assert!(msg.contains("Not enough memory")); + } + _ => panic!("Expected HostResourcesExtinguished error for memory"), + } + } + + #[tokio::test] + async fn test_consume_host_virtual_resources_insufficient_gpus() { + let mut frame = create_test_dispatch_frame(); + frame.min_gpus = 4; // More than host has + let host = create_test_host(); + let memory_stranded_threshold = ByteSize::gib(1); + + let result = RqdDispatcherService::consume_host_virtual_resources( + &frame, + &host, + memory_stranded_threshold, + ) + .await; + + assert!(result.is_err()); + match result { + Err(VirtualProcError::HostResourcesExtinguished(msg)) => { + assert!(msg.contains("Not enough GPU cores")); + } + _ => panic!("Expected HostResourcesExtinguished error for GPUs"), + } + } + + #[tokio::test] + async fn test_consume_host_virtual_resources_insufficient_gpu_memory() { + let mut frame = create_test_dispatch_frame(); + frame.min_gpu_memory = ByteSize::gib(32); // More than host has + let host = create_test_host(); + let memory_stranded_threshold = ByteSize::gib(1); + + let result = RqdDispatcherService::consume_host_virtual_resources( + &frame, + &host, + memory_stranded_threshold, + ) + .await; + + assert!(result.is_err()); + match result { + Err(VirtualProcError::HostResourcesExtinguished(msg)) => { + assert!(msg.contains("Not enough GPU memory")); + } + _ => panic!("Expected HostResourcesExtinguished error for GPU memory"), + } + } + + #[test] + fn test_prepare_rqd_run_frame_basic() { + let frame = create_test_dispatch_frame(); + let virtual_proc = VirtualProc { + proc_id: Uuid::new_v4(), + host_id: Uuid::new_v4(), + show_id: Uuid::new_v4(), + layer_id: Uuid::new_v4(), + job_id: Uuid::new_v4(), + frame_id: Uuid::new_v4(), + alloc_id: Uuid::new_v4(), + cores_reserved: CoreSize(2).with_multiplier(), + memory_reserved: ByteSize::gib(4), + gpus_reserved: 1, + gpu_memory_reserved: ByteSize::gib(8), + os: "linux".to_string(), + is_local_dispatch: false, + frame, + host_name: "somehost".to_string(), + }; + + let result = RqdDispatcherService::prepare_rqd_run_frame(&virtual_proc); + + assert!(result.is_ok()); + let run_frame = result.unwrap(); + + // Check basic fields + assert_eq!(run_frame.frame_id, virtual_proc.frame.id.to_string()); + assert_eq!(run_frame.frame_name, virtual_proc.frame.frame_name); + assert_eq!(run_frame.job_name, virtual_proc.frame.job_name); + assert_eq!(run_frame.layer_id, virtual_proc.frame.layer_id.to_string()); + assert_eq!(run_frame.resource_id, virtual_proc.proc_id.to_string()); + assert_eq!(run_frame.num_cores, virtual_proc.cores_reserved.value()); + assert_eq!(run_frame.num_gpus, virtual_proc.gpus_reserved as i32); + assert_eq!(run_frame.os, virtual_proc.os); + assert_eq!(run_frame.ignore_nimby, virtual_proc.is_local_dispatch); + + // Check environment variables + assert_eq!(run_frame.environment.get("CUE3").unwrap(), "1"); + assert_eq!(run_frame.environment.get("CUE_THREADS").unwrap(), "2"); + assert_eq!( + run_frame.environment.get("CUE_MEMORY").unwrap(), + &virtual_proc.memory_reserved.to_string() + ); + assert_eq!(run_frame.environment.get("CUE_GPUS").unwrap(), "1"); + assert_eq!( + run_frame.environment.get("CUE_FRAME").unwrap(), + &virtual_proc.frame.frame_name + ); + assert_eq!( + run_frame.environment.get("CUE_JOB").unwrap(), + &virtual_proc.frame.job_name + ); + assert_eq!( + run_frame.environment.get("CUE_LAYER").unwrap(), + &virtual_proc.frame.layer_name + ); + assert_eq!( + run_frame.environment.get("CUE_SHOW").unwrap(), + &virtual_proc.frame.show_name + ); + assert_eq!( + run_frame.environment.get("CUE_USER").unwrap(), + &virtual_proc.frame.user + ); + assert_eq!( + run_frame.environment.get("CUE_RANGE").unwrap(), + &virtual_proc.frame.range + ); + } + + #[test] + fn test_prepare_rqd_run_frame_token_replacement() { + let mut frame = create_test_dispatch_frame(); + frame.command = + "render #ZFRAME# #IFRAME# #FRAME_START# #FRAME_END# #LAYER# #JOB# #FRAME#".to_string(); + frame.frame_name = "0005-test_frame".to_string(); + frame.range = "1-10".to_string(); // Ensure frame 5 is in range + + let virtual_proc = VirtualProc { + proc_id: Uuid::new_v4(), + host_id: Uuid::new_v4(), + show_id: Uuid::new_v4(), + layer_id: Uuid::new_v4(), + job_id: Uuid::new_v4(), + frame_id: Uuid::new_v4(), + alloc_id: Uuid::new_v4(), + cores_reserved: CoreSize(1).with_multiplier(), + memory_reserved: ByteSize::gib(2), + gpus_reserved: 0, + gpu_memory_reserved: ByteSize::gb(0), + os: "linux".to_string(), + is_local_dispatch: false, + frame, + host_name: "somehost".to_string(), + }; + + let result = RqdDispatcherService::prepare_rqd_run_frame(&virtual_proc); + + assert!(result.is_ok()); + let run_frame = result.unwrap(); + + // Check token replacements in command + let expected_command = "render 0005 5 5 5 test_layer test_job 0005-test_frame"; + assert_eq!(run_frame.command, expected_command); + + // Check frame number parsing and environment + assert_eq!(run_frame.environment.get("CUE_IFRAME").unwrap(), "5"); + } + + #[test] + fn test_prepare_rqd_run_frame_invalid_frame_name() { + let mut frame = create_test_dispatch_frame(); + frame.frame_name = "invalid-frame-name".to_string(); + + let virtual_proc = VirtualProc { + proc_id: Uuid::new_v4(), + host_id: Uuid::new_v4(), + show_id: Uuid::new_v4(), + layer_id: Uuid::new_v4(), + job_id: Uuid::new_v4(), + frame_id: Uuid::new_v4(), + alloc_id: Uuid::new_v4(), + cores_reserved: CoreSize(1).with_multiplier(), + memory_reserved: ByteSize::gib(2), + gpus_reserved: 0, + gpu_memory_reserved: ByteSize::gb(0), + os: "linux".to_string(), + is_local_dispatch: false, + frame, + host_name: "somehost".to_string(), + }; + + let result = RqdDispatcherService::prepare_rqd_run_frame(&virtual_proc); + assert!(result.is_err()); + } +} diff --git a/rust/crates/scheduler/src/pipeline/dispatcher/error.rs b/rust/crates/scheduler/src/pipeline/dispatcher/error.rs new file mode 100644 index 000000000..411049c84 --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/dispatcher/error.rs @@ -0,0 +1,57 @@ +use miette::{Diagnostic, Error}; +use thiserror::Error; + +#[derive(Debug, Error, Diagnostic)] +pub enum VirtualProcError { + #[error("Failed to create Virtual Proc. Host resources extinguished.")] + HostResourcesExtinguished(String), +} + +#[derive(Debug, Error, Diagnostic)] +pub enum DispatchError { + #[error("DispatchError: Failed to acquire lock")] + HostLock(String), + + #[error("DispatchError: Unexpected Failure")] + Failure(Error), + + #[error("DispatchError: Unexpected Failure")] + DbFailure(sqlx::Error), + + #[error("DispatchError: Allocation over burst")] + AllocationOverBurst(String), + + #[error("DispatchError: Failed to update frame on the database")] + FailedToStartOnDb(sqlx::Error), + + #[error("DispatchError: Failed to create proc on database for frame={frame_id}, host={host_id}. {error:?}")] + FailedToCreateProc { + error: sqlx::Error, + frame_id: String, + host_id: String, + }, + + #[error("DispatchError: Failed to update proc resources on database")] + FailedToUpdateResources(Error), + + #[error("DispatchError: Failed to open a GRPC connection")] + FailureGrpcConnection(String, Error), + + #[error("DispatchError: Failed to execute command on GRPC interface")] + GrpcFailure(tonic::Status), +} + +#[derive(Debug, Error, Diagnostic)] +pub enum DispatchVirtualProcError { + #[error("Allocation over burst")] + AllocationOverBurst(DispatchError), + + #[error("Failed to start frame on database")] + FailedToStartOnDb(DispatchError), + + #[error("Failed to lock frame on database")] + FrameCouldNotBeUpdated, + + #[error("Failed to connect to RQD on host {host}")] + RqdConnectionFailed { host: String, error: Error }, +} diff --git a/rust/crates/scheduler/src/pipeline/dispatcher/frame_set.rs b/rust/crates/scheduler/src/pipeline/dispatcher/frame_set.rs new file mode 100644 index 000000000..8f03d69b6 --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/dispatcher/frame_set.rs @@ -0,0 +1,970 @@ +//! Frame range parsing and manipulation for OpenCue job queue. +//! +//! This module provides functionality for parsing and manipulating frame ranges +//! commonly used in render farm job specifications. It supports various frame +//! range syntaxes including simple ranges, stepped ranges, inverse steps, and +//! interleaved patterns. +//! +//! # Frame Range Syntax +//! +//! The following syntax patterns are supported: +//! +//! - **Single frame**: `"5"` → `[5]` +//! - **Simple range**: `"1-10"` → `[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]` +//! - **Stepped range**: `"1-10x2"` → `[1, 3, 5, 7, 9]` +//! - **Inverse stepped**: `"1-10y3"` → `[2, 3, 5, 6, 8, 9]` (excludes every 3rd frame) +//! - **Negative step**: `"10-1x-2"` → `[10, 8, 6, 4, 2]` +//! - **Interleaved**: `"1-10:5"` → `[1, 6, 3, 5, 7, 9, 2, 4, 8, 10]` +//! +//! # Frame Set Syntax +//! +//! Multiple frame ranges can be combined with commas: +//! - `"1-5,10-15"` → `[1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15]` +//! - `"1-10x2,20,25-30"` → `[1, 3, 5, 7, 9, 20, 25, 26, 27, 28, 29, 30]` +//! +//! # Examples +//! +//! ```rust,ignore +//! use scheduler::pipeline::dispatcher::frame_set::{FrameRange, FrameSet}; +//! +//! // Parse a simple frame range +//! let range = FrameRange::new("1-10x2")?; +//! assert_eq!(range.get_all(), &[1, 3, 5, 7, 9]); +//! +//! // Parse a complex frame set +//! let frame_set = FrameSet::new("1-5,10-15x2")?; +//! assert_eq!(frame_set.get_all(), &[1, 2, 3, 4, 5, 10, 12, 14]); +//! +//! // Get a chunk for job distribution +//! let chunk = frame_set.get_chunk(2, 3)?; // Starting at index 2, size 3 +//! assert_eq!(chunk, "3-5"); +//! ``` + +use indexmap::IndexSet; +use miette::{Context, IntoDiagnostic, Result, miette}; +use regex::Regex; + +/// Represents a sequence of image frames parsed from a frame range specification. +/// +/// A `FrameRange` represents a single contiguous or patterned sequence of frame numbers. +/// It supports various syntaxes including simple ranges, stepped ranges, inverse steps, +/// and interleaved patterns. +/// +/// This is a direct port of the Java `FrameRange` class from OpenCue's codebot. +/// +/// # Supported Syntax +/// +/// - **Single frame**: `"42"` produces `[42]` +/// - **Simple range**: `"1-10"` produces `[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]` +/// - **Stepped range (x)**: `"1-10x3"` produces `[1, 4, 7, 10]` (every 3rd frame) +/// - **Inverse stepped (y)**: `"1-10y3"` produces `[2, 3, 5, 6, 8, 9]` (all except every 3rd) +/// - **Negative step**: `"10-1x-2"` produces `[10, 8, 6, 4, 2]` (backwards with step) +/// - **Interleaved (:)**: `"1-10:5"` produces interleaved pattern for render optimization +/// +/// # Validation Rules +/// +/// - Step size cannot be zero +/// - For positive steps, end frame must be >= start frame +/// - For negative steps, end frame must be <= start frame +/// - Step size and interleave size cannot be combined +/// +/// # Examples +/// +/// ```rust,ignore +/// // Basic usage +/// let range = FrameRange::new("1-10x2")?; +/// assert_eq!(range.size(), 5); +/// assert_eq!(range.get(0), Some(1)); +/// assert_eq!(range.get_all(), &[1, 3, 5, 7, 9]); +/// +/// // Inverse stepping +/// let inverse = FrameRange::new("1-10y3")?; +/// assert_eq!(inverse.get_all(), &[2, 3, 5, 6, 8, 9]); +/// ``` +#[derive(Debug, Clone, PartialEq)] +#[allow(dead_code)] +pub struct FrameRange { + frame_list: Vec, +} + +impl FrameRange { + /// Constructs a new `FrameRange` by parsing a frame range specification. + /// + /// # Arguments + /// + /// * `frame_range` - A string specification following the frame range syntax + /// + /// # Returns + /// + /// * `Ok(FrameRange)` - Successfully parsed frame range + /// * `Err(String)` - Parse error with description + /// + /// # Examples + /// + /// ```rust,ignore + /// let range = FrameRange::new("1-10x2")?; + /// let single = FrameRange::new("42")?; + /// let inverse = FrameRange::new("1-10y3")?; + /// ``` + /// + /// # Errors + /// + /// Returns an error if: + /// - The syntax is invalid or unrecognized + /// - Step size is zero + /// - Step direction conflicts with range direction + /// - Frame numbers cannot be parsed as integers + #[allow(dead_code)] + pub fn new(frame_range: &str) -> Result { + let frame_list = Self::parse_frame_range(frame_range)?; + Ok(FrameRange { frame_list }) + } + + /// Gets the number of frames contained in this sequence. + /// + /// # Returns + /// + /// The total count of frames in the range. + /// + /// # Example + /// + /// ```rust,ignore + /// let range = FrameRange::new("1-10x2")?; + /// assert_eq!(range.size(), 5); // [1, 3, 5, 7, 9] + /// ``` + #[allow(dead_code)] + pub fn size(&self) -> usize { + self.frame_list.len() + } + + /// Gets an individual frame number by its position in the sequence. + /// + /// # Arguments + /// + /// * `idx` - Zero-based index into the frame sequence + /// + /// # Returns + /// + /// * `Some(frame_number)` - If the index is valid + /// * `None` - If the index is out of bounds + /// + /// # Example + /// + /// ```rust,ignore + /// let range = FrameRange::new("1-10x2")?; + /// assert_eq!(range.get(0), Some(1)); + /// assert_eq!(range.get(2), Some(5)); + /// assert_eq!(range.get(10), None); + /// ``` + #[allow(dead_code)] + pub fn get(&self, idx: usize) -> Option { + self.frame_list.get(idx).copied() + } + + /// Finds the index of a specific frame number in the sequence. + /// + /// # Arguments + /// + /// * `frame` - The frame number to search for + /// + /// # Returns + /// + /// * `Some(index)` - Zero-based index if the frame is found + /// * `None` - If the frame is not in the sequence + /// + /// # Example + /// + /// ```rust,ignore + /// let range = FrameRange::new("1-10x2")?; + /// assert_eq!(range.index(5), Some(2)); + /// assert_eq!(range.index(4), None); // 4 is not in [1,3,5,7,9] + /// ``` + #[allow(dead_code)] + pub fn index(&self, frame: i32) -> Option { + self.frame_list.iter().position(|&x| x == frame) + } + + /// Gets a reference to the complete frame sequence as a slice. + /// + /// # Returns + /// + /// A slice containing all frame numbers in order. + /// + /// # Example + /// + /// ```rust,ignore + /// let range = FrameRange::new("1-5")?; + /// assert_eq!(range.get_all(), &[1, 2, 3, 4, 5]); + /// ``` + #[allow(dead_code)] + pub fn get_all(&self) -> &[i32] { + &self.frame_list + } + + /// Parses a frame range specification string into a vector of frame numbers. + /// + /// This is the core parsing logic that handles all supported syntax patterns. + /// It uses regex patterns to identify and parse different frame range formats. + fn parse_frame_range(frame_range: &str) -> Result> { + let single_frame_pattern = Regex::new(r"^(-?\d+)$").unwrap(); + let simple_range_pattern = Regex::new(r"^(?P-?\d+)-(?P-?\d+)$").unwrap(); + let step_pattern = + Regex::new(r"^(?P-?\d+)-(?P-?\d+)(?P[xy])(?P-?\d+)$").unwrap(); + let interleave_pattern = + Regex::new(r"^(?P-?\d+)-(?P-?\d+):(?P-?\d+)$").unwrap(); + + if let Some(caps) = single_frame_pattern.captures(frame_range) { + let frame: i32 = caps + .get(1) + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err(format!("Invalid frame number: {}", frame_range))?; + return Ok(vec![frame]); + } + + if let Some(caps) = simple_range_pattern.captures(frame_range) { + let start_frame: i32 = caps + .name("sf") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid start frame".to_string())?; + let end_frame: i32 = caps + .name("ef") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid end frame".to_string())?; + let step = if end_frame >= start_frame { 1 } else { -1 }; + return Self::get_int_range(start_frame, end_frame, step); + } + + if let Some(caps) = step_pattern.captures(frame_range) { + let start_frame: i32 = caps + .name("sf") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid start frame".to_string())?; + let end_frame: i32 = caps + .name("ef") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid end frame".to_string())?; + let step: i32 = caps + .name("step") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid step".to_string())?; + let step_sep = caps.name("stepSep").unwrap().as_str(); + let inverse_step = step_sep == "y"; + return Self::get_stepped_range(start_frame, end_frame, step, inverse_step); + } + + if let Some(caps) = interleave_pattern.captures(frame_range) { + let start_frame: i32 = caps + .name("sf") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid start frame".to_string())?; + let end_frame: i32 = caps + .name("ef") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid end frame".to_string())?; + let step: i32 = caps + .name("step") + .unwrap() + .as_str() + .parse() + .into_diagnostic() + .wrap_err("Invalid step".to_string())?; + return Self::get_interleaved_range(start_frame, end_frame, step); + } + + Err(miette!("Unrecognized frame range syntax: {}", frame_range)) + } + + /// Generates an integer range with the specified start, end, and step values. + /// + /// This method handles the core logic for generating frame sequences, including + /// support for negative steps and proper filtering based on step intervals. + fn get_int_range(start: i32, end: i32, step: i32) -> Result> { + let (stream_start, stream_end) = if step < 0 { (end, start) } else { (start, end) }; + let stream_step = step.abs(); + + let mut result = Vec::new(); + let mut current = stream_start; + + while current <= stream_end { + if (current - start) % stream_step == 0 { + result.push(current); + } + current += 1; + } + + if step < 0 { + result.reverse(); + } + + Ok(result) + } + + /// Generates a stepped range, optionally with inverse stepping. + /// + /// For normal stepping (x syntax), returns frames at the specified intervals. + /// For inverse stepping (y syntax), returns all frames EXCEPT those at the intervals. + /// + /// # Arguments + /// * `start` - Starting frame number + /// * `end` - Ending frame number + /// * `step` - Step interval + /// * `inverse_step` - If true, excludes stepped frames instead of including them + fn get_stepped_range(start: i32, end: i32, step: i32, inverse_step: bool) -> Result> { + Self::validate_step_sign(start, end, step)?; + let stepped_range = Self::get_int_range(start, end, step)?; + + if inverse_step { + let full_range = Self::get_int_range(start, end, if step < 0 { -1 } else { 1 })?; + let stepped_set: std::collections::HashSet<_> = stepped_range.into_iter().collect(); + let result: Vec = full_range + .into_iter() + .filter(|x| !stepped_set.contains(x)) + .collect(); + Ok(result) + } else { + Ok(stepped_range) + } + } + + /// Generates an interleaved frame sequence for render optimization. + /// + /// The interleaved pattern distributes frames across the range to provide + /// better early feedback during rendering. The algorithm progressively + /// halves the step size to fill in gaps. + /// + /// Example: "1-10:5" produces [1, 6, 3, 5, 7, 9, 2, 4, 8, 10] + fn get_interleaved_range(start: i32, end: i32, mut step: i32) -> Result> { + Self::validate_step_sign(start, end, step)?; + let mut interleaved_frames = IndexSet::new(); + + while step.abs() > 0 { + let range = Self::get_int_range(start, end, step)?; + for frame in range { + interleaved_frames.insert(frame); + } + step /= 2; + } + + Ok(interleaved_frames.into_iter().collect()) + } + + /// Validates that the step direction is compatible with the range direction. + /// + /// Ensures that positive steps are only used with ascending ranges and + /// negative steps are only used with descending ranges. Step size zero is invalid. + fn validate_step_sign(start: i32, end: i32, step: i32) -> Result<()> { + if step > 1 { + if end < start { + Err(miette!( + "End frame may not be less than start frame when using a positive step" + )) + } else { + Ok(()) + } + } else if step == 0 { + Err(miette!("Step cannot be zero")) + } else if step < 0 && end >= start { + Err(miette!( + "End frame may not be greater than start frame when using a negative step" + )) + } else { + Ok(()) + } + } +} + +/// Represents an ordered sequence of FrameRanges combined into a single frame list. +/// +/// A `FrameSet` allows combining multiple frame range specifications using comma-separated +/// syntax. Each section is parsed as a `FrameRange` and the results are concatenated. +/// +/// This is a direct port of the Java `FrameSet` class from OpenCue's codebot. +/// +/// # Syntax +/// +/// Frame sets use comma-separated frame range specifications: +/// - `"1-10"` - Simple range: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +/// - `"1-5,10-15"` - Multiple ranges: [1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15] +/// - `"1-10x2,20,25-30"` - Mixed syntax: [1, 3, 5, 7, 9, 20, 25, 26, 27, 28, 29, 30] +/// - `"1-5x2, 10-15, 20"` - Whitespace is trimmed automatically +/// +/// # Job Distribution +/// +/// FrameSet provides chunking functionality for distributing frames across render nodes: +/// +/// ```rust,ignore +/// let frame_set = FrameSet::new("1-100")?; +/// let chunk1 = frame_set.get_chunk(0, 10)?; // "1-10" +/// let chunk2 = frame_set.get_chunk(10, 10)?; // "11-20" +/// ``` +/// +/// Chunks are returned as compact string representations that can be parsed by render nodes. +/// +/// # Examples +/// +/// ```rust,ignore +/// // Basic frame set +/// let frames = FrameSet::new("1-5,10-12")?; +/// assert_eq!(frames.get_all(), &[1, 2, 3, 4, 5, 10, 11, 12]); +/// assert_eq!(frames.size(), 8); +/// +/// // Complex frame set with different syntaxes +/// let complex = FrameSet::new("1-10x2,15,20-25")?; +/// assert_eq!(complex.get_all(), &[1, 3, 5, 7, 9, 15, 20, 21, 22, 23, 24, 25]); +/// +/// // Chunking for job distribution +/// let chunk = complex.get_chunk(0, 3)?; // First 3 frames +/// // Returns compact representation like "1-5x2,15" +/// ``` +#[derive(Debug, Clone, PartialEq)] +#[allow(dead_code)] +pub struct FrameSet { + frame_list: Vec, +} + +impl FrameSet { + /// Constructs a new `FrameSet` by parsing a comma-separated frame range specification. + /// + /// # Arguments + /// + /// * `frame_range` - Comma-separated frame range specifications + /// + /// # Returns + /// + /// * `Ok(FrameSet)` - Successfully parsed frame set + /// * `Err(String)` - Parse error with description + /// + /// # Examples + /// + /// ```rust,ignore + /// let simple = FrameSet::new("1-10")?; + /// let multi = FrameSet::new("1-5,10-15")?; + /// let complex = FrameSet::new("1-10x2, 20, 25-30")?; + /// ``` + pub fn new(frame_range: &str) -> Result { + let frame_list = Self::parse_frame_range(frame_range)?; + Ok(FrameSet { frame_list }) + } + + /// Gets the total number of frames in this frame set. + /// + /// # Returns + /// + /// The total count of frames across all ranges. + #[allow(dead_code)] + pub fn size(&self) -> usize { + self.frame_list.len() + } + + /// Gets an individual frame number by its position in the sequence. + /// + /// # Arguments + /// + /// * `idx` - Zero-based index into the frame sequence + /// + /// # Returns + /// + /// * `Some(frame_number)` - If the index is valid + /// * `None` - If the index is out of bounds + #[allow(dead_code)] + pub fn get(&self, idx: usize) -> Option { + self.frame_list.get(idx).copied() + } + + /// Gets last individual frame number. + /// + /// # Returns + /// + /// * `Some(frame_number)` - If set not empty + /// * `None` - Otherwise + pub fn last(&self) -> Option { + self.frame_list.last().cloned() + } + + /// Finds the index of a specific frame number in the sequence. + /// + /// # Arguments + /// + /// * `frame` - The frame number to search for + /// + /// # Returns + /// + /// * `Some(index)` - Zero-based index if found + /// * `None` - If the frame is not in the set + pub fn index(&self, frame: i32) -> Option { + self.frame_list.iter().position(|&x| x == frame) + } + + /// Gets a reference to the complete frame sequence as a slice. + /// + /// # Returns + /// + /// A slice containing all frame numbers in the order they were specified. + #[allow(dead_code)] + pub fn get_all(&self) -> &[i32] { + &self.frame_list + } + + /// Returns a sub-FrameSet as a compact string representation for job distribution. + /// + /// This method is used to divide frame sets into smaller chunks for distribution + /// across render nodes. The returned string uses the most compact frame range + /// representation possible. + /// + /// # Arguments + /// + /// * `start_frame_index` - Zero-based index of the first frame to include + /// * `chunk_size` - Maximum number of frames to include in the chunk + /// + /// # Returns + /// + /// * `Ok(String)` - Compact frame range representation (e.g., "1-10", "1,3,5", "10-20x2") + /// * `Err(String)` - If start_frame_index is out of bounds + /// + /// # Examples + /// + /// ```rust,ignore + /// let frames = FrameSet::new("1-20")?; + /// assert_eq!(frames.get_chunk(0, 5)?, "1-5"); + /// assert_eq!(frames.get_chunk(5, 5)?, "6-10"); + /// + /// let stepped = FrameSet::new("1-10x2")?; // [1, 3, 5, 7, 9] + /// assert_eq!(stepped.get_chunk(1, 3)?, "3-7x2"); // [3, 5, 7] + /// ``` + /// + /// # Errors + /// + /// Returns an error if `start_frame_index` is greater than or equal to the + /// total number of frames in the set. + pub fn get_chunk(&self, start_frame_index: usize, chunk_size: usize) -> Result { + if self.frame_list.len() <= start_frame_index { + Err(miette!( + "startFrameIndex {} is not in range 0-{}", + start_frame_index, + self.frame_list.len() - 1 + ))?; + } + + if chunk_size == 1 { + return Ok(self.frame_list[start_frame_index].to_string()); + } + + let final_frame_index = self.frame_list.len() - 1; + let mut end_frame_index = start_frame_index + chunk_size - 1; + if end_frame_index > final_frame_index { + end_frame_index = final_frame_index; + } + + let subset = &self.frame_list[start_frame_index..=end_frame_index]; + Ok(Self::frames_to_frame_ranges(subset)) + } + + /// Parses a comma-separated frame range specification into a vector of frame numbers. + /// + /// Each comma-separated section is parsed as an individual FrameRange and the + /// results are concatenated in order. + fn parse_frame_range(frame_range: &str) -> Result> { + let mut result = Vec::new(); + for frame_range_section in frame_range.split(',') { + let section_frames = FrameRange::parse_frame_range(frame_range_section.trim())?; + result.extend(section_frames); + } + Ok(result) + } + + /// Builds a compact string representation for a frame range part. + /// + /// Returns the most compact representation: + /// - Single frame: "5" + /// - Consecutive frames: "1-10" + /// - Stepped frames: "1-10x2" + fn build_frame_part(start_frame: i32, end_frame: i32, step: i32) -> String { + if start_frame == end_frame { + start_frame.to_string() + } else if step == 1 { + format!("{}-{}", start_frame, end_frame) + } else { + format!("{}-{}x{}", start_frame, end_frame, step) + } + } + + /// Converts a list of frame numbers back to the most compact frame range representation. + /// + /// This method analyzes the frame sequence to detect patterns and produces + /// the most compact string representation possible. It's adapted from the + /// Python Fileseq library approach used in the original Java implementation. + /// + /// # Arguments + /// + /// * `frames` - Slice of frame numbers in ascending order + /// + /// # Returns + /// + /// Compact frame range string (e.g., "1-10", "1-10x2", "1,3,5,10-15") + fn frames_to_frame_ranges(frames: &[i32]) -> String { + let l = frames.len(); + if l == 0 { + return String::new(); + } else if l == 1 { + return frames[0].to_string(); + } + + let mut result_parts = Vec::new(); + let mut curr_count = 1; + let mut curr_step = 0; + let mut curr_start = frames[0]; + let mut last_frame = frames[0]; + + for &curr_frame in frames.iter().skip(1) { + if curr_step == 0 { + curr_step = curr_frame - curr_start; + } + let new_step = curr_frame - last_frame; + + if curr_step == new_step { + last_frame = curr_frame; + curr_count += 1; + } else if curr_count == 2 && curr_step != 1 { + result_parts.push(curr_start.to_string()); + curr_step = 0; + curr_start = last_frame; + last_frame = curr_frame; + } else { + result_parts.push(Self::build_frame_part(curr_start, last_frame, curr_step)); + curr_step = 0; + curr_start = curr_frame; + last_frame = curr_frame; + curr_count = 1; + } + } + + if curr_count == 2 && curr_step != 1 { + result_parts.push(curr_start.to_string()); + result_parts.push(frames[frames.len() - 1].to_string()); + } else { + result_parts.push(Self::build_frame_part( + curr_start, + frames[frames.len() - 1], + curr_step, + )); + } + + result_parts.join(",") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Basic functionality tests + #[test] + fn test_single_frame() { + let frame_range = FrameRange::new("5").unwrap(); + assert_eq!(frame_range.get_all(), &[5]); + } + + #[test] + fn test_single_frame_negative() { + let frame_range = FrameRange::new("-5").unwrap(); + assert_eq!(frame_range.get_all(), &[-5]); + } + + #[test] + fn test_simple_range() { + let frame_range = FrameRange::new("1-5").unwrap(); + assert_eq!(frame_range.get_all(), &[1, 2, 3, 4, 5]); + } + + #[test] + fn test_simple_range_negative() { + let frame_range = FrameRange::new("-5--1").unwrap(); + assert_eq!(frame_range.get_all(), &[-5, -4, -3, -2, -1]); + } + + // Stepped range tests (x syntax) + #[test] + fn test_stepped_range_basic() { + let frame_range = FrameRange::new("1-10x2").unwrap(); + assert_eq!(frame_range.get_all(), &[1, 3, 5, 7, 9]); + } + + #[test] + fn test_stepped_range_documented_example() { + let frame_range = FrameRange::new("1-10x3").unwrap(); + assert_eq!(frame_range.get_all(), &[1, 4, 7, 10]); + } + + #[test] + fn test_stepped_range_step_of_one() { + let frame_range = FrameRange::new("1-5x1").unwrap(); + assert_eq!(frame_range.get_all(), &[1, 2, 3, 4, 5]); + } + + #[test] + fn test_stepped_range_large_step() { + let frame_range = FrameRange::new("1-10x5").unwrap(); + assert_eq!(frame_range.get_all(), &[1, 6]); + } + + // Negative stepped range tests + #[test] + fn test_negative_stepped_range() { + let frame_range = FrameRange::new("10-1x-1").unwrap(); + assert_eq!(frame_range.get_all(), &[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]); + } + + #[test] + fn test_negative_stepped_range_with_step() { + let frame_range = FrameRange::new("10-1x-2").unwrap(); + assert_eq!(frame_range.get_all(), &[10, 8, 6, 4, 2]); + } + + // Inverse stepped range tests (y syntax) + #[test] + fn test_inverse_stepped_range_documented_example() { + let frame_range = FrameRange::new("1-10y3").unwrap(); + assert_eq!(frame_range.get_all(), &[2, 3, 5, 6, 8, 9]); + } + + #[test] + fn test_inverse_stepped_range_step_2() { + let frame_range = FrameRange::new("1-10y2").unwrap(); + assert_eq!(frame_range.get_all(), &[2, 4, 6, 8, 10]); + } + + #[test] + fn test_inverse_stepped_range_step_1() { + let frame_range = FrameRange::new("1-5y1").unwrap(); + assert_eq!(frame_range.get_all(), &[] as &[i32]); + } + + // Interleaved range tests (: syntax) + #[test] + fn test_interleaved_range_documented_example() { + let frame_range = FrameRange::new("1-10:5").unwrap(); + // Actual output from our implementation + assert_eq!(frame_range.get_all(), &[1, 6, 3, 5, 7, 9, 2, 4, 8, 10]); + } + + #[test] + fn test_interleaved_range_step_2() { + let frame_range = FrameRange::new("1-8:2").unwrap(); + assert_eq!(frame_range.get_all(), &[1, 3, 5, 7, 2, 4, 6, 8]); + } + + #[test] + fn test_interleaved_range_step_4() { + let frame_range = FrameRange::new("1-8:4").unwrap(); + // Actual output from our implementation + assert_eq!(frame_range.get_all(), &[1, 5, 3, 7, 2, 4, 6, 8]); + } + + // Error cases and validation + #[test] + fn test_step_zero_error() { + let result = FrameRange::new("1-10x0"); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Step cannot be zero") + ); + } + + #[test] + fn test_positive_step_with_descending_range_error() { + let result = FrameRange::new("10-1x2"); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("End frame may not be less than start frame when using a positive step") + ); + } + + #[test] + fn test_negative_step_with_ascending_range_error() { + let result = FrameRange::new("1-10x-2"); + assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains( + "End frame may not be greater than start frame when using a negative step" + ) + ); + } + + #[test] + fn test_invalid_syntax_error() { + let result = FrameRange::new("1-10z2"); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Unrecognized frame range syntax") + ); + } + + #[test] + fn test_malformed_range_error() { + let result = FrameRange::new("abc"); + assert!(result.is_err()); + } + + // FrameSet tests + #[test] + fn test_frame_set_simple() { + let frame_set = FrameSet::new("1-3,5-7").unwrap(); + assert_eq!(frame_set.get_all(), &[1, 2, 3, 5, 6, 7]); + } + + #[test] + fn test_frame_set_mixed_syntax() { + let frame_set = FrameSet::new("1-5x2,10,15-20").unwrap(); + assert_eq!(frame_set.get_all(), &[1, 3, 5, 10, 15, 16, 17, 18, 19, 20]); + } + + #[test] + fn test_frame_set_with_spaces() { + let frame_set = FrameSet::new("1-3, 5-7, 10").unwrap(); + assert_eq!(frame_set.get_all(), &[1, 2, 3, 5, 6, 7, 10]); + } + + #[test] + fn test_frame_set_single_frame() { + let frame_set = FrameSet::new("42").unwrap(); + assert_eq!(frame_set.get_all(), &[42]); + } + + // Chunk tests + #[test] + fn test_frame_set_get_chunk() { + let frame_set = FrameSet::new("1-10").unwrap(); + let chunk = frame_set.get_chunk(0, 3).unwrap(); + assert_eq!(chunk, "1-3"); + } + + #[test] + fn test_frame_set_get_chunk_single() { + let frame_set = FrameSet::new("1-10").unwrap(); + let chunk = frame_set.get_chunk(2, 1).unwrap(); + assert_eq!(chunk, "3"); + } + + #[test] + fn test_frame_set_get_chunk_end_of_range() { + let frame_set = FrameSet::new("1-10").unwrap(); + let chunk = frame_set.get_chunk(7, 5).unwrap(); // Should only get frames 8,9,10 + assert_eq!(chunk, "8-10"); + } + + #[test] + fn test_frame_set_get_chunk_out_of_bounds() { + let frame_set = FrameSet::new("1-5").unwrap(); + let result = frame_set.get_chunk(10, 3); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("startFrameIndex 10 is not in range 0-4") + ); + } + + #[test] + fn test_frame_set_get_chunk_stepped_frames() { + let frame_set = FrameSet::new("1-10x2").unwrap(); // [1, 3, 5, 7, 9] + let chunk = frame_set.get_chunk(1, 3).unwrap(); // Should get [3, 5, 7] + assert_eq!(chunk, "3-7x2"); + } + + // Frame range reconstruction tests + #[test] + fn test_frames_to_frame_ranges_simple() { + let frames = &[1, 2, 3, 5, 6, 7]; + let result = FrameSet::frames_to_frame_ranges(frames); + assert_eq!(result, "1-3,5-7"); + } + + #[test] + fn test_frames_to_frame_ranges_stepped() { + let frames = &[1, 3, 5, 7, 9]; + let result = FrameSet::frames_to_frame_ranges(frames); + assert_eq!(result, "1-9x2"); + } + + #[test] + fn test_frames_to_frame_ranges_single_frame() { + let frames = &[42]; + let result = FrameSet::frames_to_frame_ranges(frames); + assert_eq!(result, "42"); + } + + #[test] + fn test_frames_to_frame_ranges_empty() { + let frames = &[]; + let result = FrameSet::frames_to_frame_ranges(frames); + assert_eq!(result, ""); + } + + #[test] + fn test_frames_to_frame_ranges_mixed() { + let frames = &[1, 3, 5, 10, 11, 12]; + let result = FrameSet::frames_to_frame_ranges(frames); + assert_eq!(result, "1-5x2,10-12"); + } + + #[test] + fn test_frames_to_frame_ranges_single_gaps() { + let frames = &[1, 3, 5]; + let result = FrameSet::frames_to_frame_ranges(frames); + assert_eq!(result, "1-5x2"); + } + + // Edge cases + #[test] + fn test_frame_range_single_element_range() { + let frame_range = FrameRange::new("5-5").unwrap(); + assert_eq!(frame_range.get_all(), &[5]); + } + + #[test] + fn test_frame_range_backwards_single_step() { + let frame_range = FrameRange::new("5-1").unwrap(); + assert_eq!(frame_range.get_all(), &[5, 4, 3, 2, 1]); + } + + #[test] + fn test_complex_frame_set() { + let frame_set = FrameSet::new("1-5x2,10-15,20-30x3,50").unwrap(); + let expected = [1, 3, 5, 10, 11, 12, 13, 14, 15, 20, 23, 26, 29, 50]; + assert_eq!(frame_set.get_all(), &expected); + } +} diff --git a/rust/crates/scheduler/src/pipeline/dispatcher/messages.rs b/rust/crates/scheduler/src/pipeline/dispatcher/messages.rs new file mode 100644 index 000000000..a77ec2805 --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/dispatcher/messages.rs @@ -0,0 +1,50 @@ +use actix::{Message, MessageResponse}; +use miette::Result; + +use crate::{ + models::{DispatchLayer, Host}, + pipeline::dispatcher::error::DispatchError, +}; + +/// Actor message to dispatch a layer's frames to a specific host. +/// +/// Sends a layer with its frames to the RqdDispatcherService actor for execution +/// on the specified host. The dispatcher will: +/// - Lock the host to prevent concurrent dispatches +/// - Book frames one by one until host resources are exhausted +/// - Update frame states in the database +/// - Communicate with RQD via gRPC to launch frames +/// - Return the updated host and layer state +/// +/// # Fields +/// +/// * `layer` - Layer containing frames to dispatch +/// * `host` - Target host with available resources +/// +/// # Returns +/// +/// * `Ok(DispatchResult)` - Successfully dispatched frames with updated state +/// * `Err(DispatchError)` - Dispatch failed due to various errors +#[derive(Message)] +#[rtype(result = "Result")] +pub struct DispatchLayerMessage { + pub layer: DispatchLayer, + pub host: Host, +} + +/// Response returned after a successful dispatch operation. +/// +/// Contains the updated state of both the host and layer after dispatching +/// frames. The host reflects consumed resources, and the layer has dispatched +/// frames removed from its frame list. +/// +/// # Fields +/// +/// * `updated_host` - Host with updated idle resource counts after dispatch +/// * `updated_layer` - Layer with dispatched frames removed from the frames list +/// * `dispatched_frames` - List of frame names that were successfully dispatched +#[derive(MessageResponse, Debug)] +pub struct DispatchResult { + pub updated_host: Host, + pub updated_layer: DispatchLayer, +} diff --git a/rust/crates/scheduler/src/pipeline/dispatcher/mod.rs b/rust/crates/scheduler/src/pipeline/dispatcher/mod.rs new file mode 100644 index 000000000..4e672f43d --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/dispatcher/mod.rs @@ -0,0 +1,74 @@ +pub mod actor; +pub mod error; +mod frame_set; +pub mod messages; + +use miette::Result; +use std::sync::Arc; + +// Actor and singleton support +use actix::{Actor, Addr}; +pub use actor::RqdDispatcherService; +use tokio::sync::OnceCell; + +use crate::dao::{LayerDao, ProcDao}; + +static RQD_DISPATCHER: OnceCell> = OnceCell::const_new(); + +/// Singleton getter for the RQD dispatcher service +/// +/// Creates and returns a singleton instance of the RqdDispatcherService actor. +/// The service is initialized with configuration from CONFIG on first access. +/// +/// # Usage Example +/// ```rust,ignore +/// use crate::pipeline::dispatcher::{rqd_dispatcher_service, messages::DispatchLayer}; +/// use crate::models::{DispatchLayer as ModelDispatchLayer, Host}; +/// +/// async fn dispatch_example() -> miette::Result<()> { +/// let dispatcher = rqd_dispatcher_service().await?; +/// +/// let message = DispatchLayer { +/// layer: my_layer, +/// host: my_host, +/// transaction_id: "tx-123".to_string(), +/// }; +/// +/// match dispatcher.send(message).await { +/// Ok(Ok(result)) => { +/// println!("Dispatched {} frames", result.dispatched_frames.len()); +/// } +/// Ok(Err(e)) => println!("Dispatch error: {}", e), +/// Err(e) => println!("Actor mailbox error: {}", e), +/// } +/// +/// Ok(()) +/// } +/// ``` +pub async fn rqd_dispatcher_service() -> Result, miette::Error> { + RQD_DISPATCHER + .get_or_try_init(|| async { + use crate::{ + config::CONFIG, + dao::{FrameDao, HostDao}, + }; + + let frame_dao = Arc::new(FrameDao::new().await?); + let layer_dao = Arc::new(LayerDao::new().await?); + let host_dao = Arc::new(HostDao::new().await?); + let proc_dao = Arc::new(ProcDao::new().await?); + + let service = RqdDispatcherService::new( + frame_dao, + layer_dao, + host_dao, + proc_dao, + CONFIG.rqd.dry_run_mode, + ) + .await?; + + Ok(service.start()) + }) + .await + .cloned() +} diff --git a/rust/crates/scheduler/src/pipeline/entrypoint.rs b/rust/crates/scheduler/src/pipeline/entrypoint.rs new file mode 100644 index 000000000..3bb8110ef --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/entrypoint.rs @@ -0,0 +1,119 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use futures::{stream, StreamExt}; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tracing::{debug, error, info}; + +use crate::cluster::{Cluster, ClusterFeed, FeedMessage}; +use crate::config::CONFIG; +use crate::dao::JobDao; +use crate::metrics; +use crate::models::DispatchJob; +use crate::pipeline::MatchingService; + +/// Runs the scheduler feed loop, processing jobs for each cluster. +/// +/// Iterates through the cluster feed, fetching and processing jobs for each cluster. +/// Jobs are processed concurrently within configurable buffer sizes. The loop can +/// optionally terminate after a configured number of empty cycles. +/// +/// # Arguments +/// +/// * `cluster_feed` - Iterator over clusters to process +/// +/// # Returns +/// +/// * `Ok(())` - Scheduler completed successfully +/// * `Err(miette::Error)` - Fatal error occurred during processing +pub async fn run(cluster_feed: ClusterFeed) -> miette::Result<()> { + let job_fetcher = Arc::new(JobDao::new().await?); + let matcher = Arc::new(MatchingService::new().await?); + let cycles_without_jobs = Arc::new(AtomicUsize::new(0)); + info!("Starting scheduler feed"); + + let (tx, cluster_receiver) = mpsc::channel(16); + let feed_sender = cluster_feed.stream(tx).await; + + ReceiverStream::new(cluster_receiver) + .for_each_concurrent(CONFIG.queue.stream.cluster_buffer_size, |cluster| { + let job_fetcher = job_fetcher.clone(); + let matcher = matcher.clone(); + let cycles_without_jobs = cycles_without_jobs.clone(); + let feed_sender = feed_sender.clone(); + + async move { + let jobs = match &cluster { + Cluster::ComposedKey(cluster_key) => { + job_fetcher + .query_pending_jobs_by_show_facility_tag( + cluster_key.show_id, + cluster_key.facility_id, + cluster_key.tag.to_string(), + ) + .await + } + Cluster::TagsKey(facility_id, tags) => { + job_fetcher + .query_pending_jobs_by_tags( + tags.iter().map(|v| v.to_string()).collect(), + *facility_id, + ) + .await + } + }; + + match jobs { + Ok(jobs) => { + // Track number of jobs queried + metrics::increment_jobs_queried(jobs.len()); + + let processed_jobs = AtomicUsize::new(0); + stream::iter(jobs) + .for_each_concurrent( + CONFIG.queue.stream.job_buffer_size, + |job_model| async { + processed_jobs.fetch_add(1, Ordering::Relaxed); + metrics::increment_jobs_processed(); + let job = DispatchJob::new(job_model, cluster.clone()); + debug!("Found job: {}", job); + matcher.process(job).await; + }, + ) + .await; + // If no jobs got processed, sleep to prevent hammering the database with + // queries with no outcome + if processed_jobs.load(Ordering::Relaxed) == 0 { + let _ = feed_sender + .send(FeedMessage::Sleep(cluster, Duration::from_secs(3))) + .await; + } + + // If empty_jobs_cycles_before_quiting is set, quit if nothing got processed + if let Some(limit) = CONFIG.queue.empty_job_cycles_before_quiting { + // Count cycles that couldn't find any job + if processed_jobs.load(Ordering::Relaxed) == 0 { + cycles_without_jobs.fetch_add(1, Ordering::Relaxed); + } else { + cycles_without_jobs.store(0, Ordering::Relaxed); + } + + // Cancel stream processing after empty cycles + if cycles_without_jobs.load(Ordering::Relaxed) >= limit { + let _ = feed_sender.send(FeedMessage::Stop()).await; + } + } + } + Err(err) => { + let _ = feed_sender.send(FeedMessage::Stop()).await; + error!("Failed to fetch job: {}", err); + } + } + } + }) + .await; + + Ok(()) +} diff --git a/rust/crates/scheduler/src/pipeline/layer_permit.rs b/rust/crates/scheduler/src/pipeline/layer_permit.rs new file mode 100644 index 000000000..dbe66822d --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/layer_permit.rs @@ -0,0 +1,214 @@ +use actix::{Actor, AsyncContext, Handler, Message, WrapFuture}; +use scc::HashMap; +use std::{ + sync::Arc, + time::{Duration, SystemTime}, +}; +use tokio::sync::OnceCell; +use tracing::{debug, info}; +use uuid::Uuid; + +use miette::Result; + +/// Actor message to request a permit for a layer. +/// +/// Requests a permit to process a specific layer. If the layer is already +/// being processed by another task (permit hasn't expired), returns false. +/// Otherwise, grants the permit and returns true. +/// +/// # Fields +/// +/// * `id` - Unique identifier for the layer +/// * `duration` - How long the permit should be valid +/// +/// # Returns +/// +/// * `bool` - true if permit was granted, false if layer is already locked +#[derive(Message)] +#[rtype(result = "bool")] +pub struct Request { + pub id: Uuid, + pub duration: Duration, +} + +/// Actor message to release a permit for a layer. +/// +/// # Fields +/// +/// * `id` - Unique identifier for the layer +/// +/// # Returns +/// +/// * `bool` - true if permit was release, false if there wasn't a valid permit +#[derive(Message)] +#[rtype(result = "bool")] +pub struct Release { + pub id: Uuid, +} + +/// Internal representation of a layer permit. +/// +/// Tracks when a permit was issued and how long it's valid for. +struct LayerPermit { + granted_at: SystemTime, + duration: Duration, +} + +impl LayerPermit { + /// Creates a new permit with the specified duration. + fn new(duration: Duration) -> Self { + LayerPermit { + granted_at: SystemTime::now(), + duration, + } + } + + /// Checks if the permit has expired. + fn expired(&self) -> bool { + self.granted_at.elapsed().unwrap_or_default() > self.duration + } +} + +/// Service for managing layer processing permits using the Actor model. +/// +/// Prevents multiple tasks from processing the same layer concurrently by +/// issuing time-limited permits. Each layer ID can only have one active +/// permit at a time. +#[derive(Clone)] +pub struct LayerPermitService { + permits: Arc>, +} + +impl Actor for LayerPermitService { + type Context = actix::Context; + + fn started(&mut self, ctx: &mut Self::Context) { + let service = self.clone(); + + // Run cleanup every 5 minutes + ctx.run_interval(Duration::from_secs(5 * 60), move |_act, ctx| { + let service = service.clone(); + let actor_clone = service.clone(); + ctx.spawn( + async move { service.cleanup_expired_permits().await }.into_actor(&actor_clone), + ); + }); + + info!("LayerPermitService actor started"); + } + + fn stopped(&mut self, _ctx: &mut Self::Context) { + info!("LayerPermitService actor stopped"); + } +} + +impl Handler for LayerPermitService { + type Result = bool; + + fn handle(&mut self, msg: Request, _ctx: &mut Self::Context) -> Self::Result { + let Request { id, duration } = msg; + + // Check if there's an existing permit + let existing = self.permits.read_sync(&id, |_, permit| { + if permit.expired() { + // Permit exists but has expired + None + } else { + // Permit exists and is still valid + Some(()) + } + }); + + match existing { + Some(Some(())) => { + // Valid permit already exists - deny request + debug!("Layer {} already has an active permit", id); + false + } + _ => { + // No valid permit exists - grant new permit + let new_permit = LayerPermit::new(duration); + let _ = self.permits.insert_sync(id, new_permit); + debug!("Granted permit for layer {} (duration: {:?})", id, duration); + true + } + } + } +} + +impl Handler for LayerPermitService { + type Result = bool; + + fn handle(&mut self, msg: Release, _ctx: &mut Self::Context) -> Self::Result { + let Release { id } = msg; + + // Check if there's an existing permit + let existing = self.permits.remove_sync(&id); + + match existing { + Some((_, permit)) if !permit.expired() => { + // Valid permit removed + true + } + _ => { + // No valid permit found + false + } + } + } +} + +impl LayerPermitService { + /// Creates a new LayerPermitService with an empty permit map. + pub fn new() -> Self { + LayerPermitService { + permits: Arc::new(HashMap::new()), + } + } + + /// Removes expired permits from the map. + /// + /// Runs periodically to prevent unbounded growth of the permit map. + async fn cleanup_expired_permits(&self) { + let mut expired_keys = Vec::new(); + + // Collect expired permit IDs + self.permits.iter_sync(|id, permit| { + if permit.expired() { + expired_keys.push(*id); + } + true + }); + + // Remove expired permits + for id in &expired_keys { + let _ = self.permits.remove_sync(id); + } + + if !expired_keys.is_empty() { + debug!("Cleaned up {} expired layer permits", expired_keys.len()); + } + } +} + +static LAYER_PERMIT_SERVICE: OnceCell> = OnceCell::const_new(); + +/// Gets or initializes the singleton layer permit service actor. +/// +/// Returns a shared reference to the LayerPermitService actor, creating it +/// if it doesn't exist. The service manages layer processing permits to +/// prevent concurrent processing of the same layer. +/// +/// # Returns +/// +/// * `Ok(Addr)` - Actor address for sending messages +/// * `Err(miette::Error)` - Failed to initialize the service +pub async fn layer_permit_service() -> Result> { + LAYER_PERMIT_SERVICE + .get_or_try_init(|| async { + let service = LayerPermitService::new().start(); + Ok(service) + }) + .await + .cloned() +} diff --git a/rust/crates/scheduler/src/pipeline/matcher.rs b/rust/crates/scheduler/src/pipeline/matcher.rs new file mode 100644 index 000000000..b1fc03c23 --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/matcher.rs @@ -0,0 +1,480 @@ +use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; + +use uuid::Uuid; + +use crate::{ + allocation::{allocation_service, AllocationService}, + cluster::Cluster, + cluster_key::Tag, + config::CONFIG, + dao::LayerDao, + host_cache::{host_cache_service, messages::*, HostCacheService}, + metrics, + models::{CoreSize, DispatchJob, DispatchLayer, Host}, + pipeline::{ + dispatcher::{ + error::DispatchError, + messages::{DispatchLayerMessage, DispatchResult}, + rqd_dispatcher_service, RqdDispatcherService, + }, + layer_permit::{layer_permit_service, LayerPermitService, Release, Request}, + }, +}; +use actix::Addr; +use miette::{Context, Result}; +use tokio::sync::Semaphore; +use tracing::{debug, error, info, trace}; + +pub static HOSTS_ATTEMPTED: AtomicUsize = AtomicUsize::new(0); +pub static WASTED_ATTEMPTS: AtomicUsize = AtomicUsize::new(0); + +/// Event handler for booking jobs to available hosts. +/// +/// This handler orchestrates the job dispatch process by: +/// - Processing incoming dispatch jobs +/// - Finding eligible layers within each job +/// - Matching layers to available host candidates +/// - Dispatching frames to selected hosts via the RQD dispatcher +pub struct MatchingService { + host_service: Addr, + layer_permit_service: Addr, + layer_dao: LayerDao, + dispatcher_service: Addr, + concurrency_semaphore: Arc, + allocation_service: Arc, +} + +impl MatchingService { + /// Creates a new MatchingService with configured DAOs and dispatcher. + /// + /// Initializes the service with: + /// - Host cache service for finding available hosts + /// - Layer DAO for querying job layers + /// - RQD dispatcher service for frame execution + /// - Concurrency semaphore to limit database transaction pressure + /// + /// # Returns + /// + /// * `Ok(MatchingService)` - Configured matching service + /// * `Err(miette::Error)` - Failed to initialize dependencies + pub async fn new() -> Result { + let layer_dao = LayerDao::new().await?; + let host_service = host_cache_service().await?; + let layer_permit_service = layer_permit_service().await?; + + // Limiting the concurrency here is necessary to avoid consuming the entire + // database connection pool + let max_concurrent_transactions = (CONFIG.database.pool_size as usize).saturating_sub(1); + + let dispatcher_service = rqd_dispatcher_service().await?; + let allocation_service = allocation_service() + .await + .wrap_err("Failed to initialize AllocationService for MatchingService")?; + + Ok(MatchingService { + host_service, + layer_permit_service, + layer_dao, + dispatcher_service, + concurrency_semaphore: Arc::new(Semaphore::new(max_concurrent_transactions)), + allocation_service, + }) + } + + /// Processes a dispatch job by finding and dispatching its eligible layers. + /// + /// For each layer in the job: + /// - Queries eligible layers from the database + /// - Attempts to find suitable host candidates + /// - Dispatches frames to available hosts, layer by layer + /// + /// # Arguments + /// + /// * `job` - The dispatch job containing layers to process + pub async fn process(&self, job: DispatchJob) { + let job_disp = format!("{}", job); + let cluster = Arc::new(job.source_cluster); + + let layers = self + .layer_dao + .query_layers( + job.id, + cluster.tags().map(|tag| &tag.name).cloned().collect(), + ) + .await; + + match layers { + Ok(layers) => { + let processed_layers = AtomicUsize::new(0); + + // Stream elegible layers from this job and dispatch one by one + for layer in layers { + let layer_disp = format!("{}", layer); + // Limiting the concurrency here is necessary to avoid consuming the entire + // database connection pool + let _permit = self + .concurrency_semaphore + .acquire() + .await + .expect("Semaphore shouldn't be closed"); + + let cluster = cluster.clone(); + + // Holding a permit for a layer is intended to eliminate a race condition + // between concurrent cluster_rounds attempting to process the same layer. + // The race condition is mitigated, but not complitely avoided, as the permit + // is acquired after the layers and frames have been queried. Acquiring the + // permit before querying would require breaking 'query_layers' into separate + // queries, one per layer, which greatly impacts performance. The rare cases + // that race each other are controlled by the frame.int_version lock on + // frame_dao.lock_for_update + let layer_permit = self + .layer_permit_service + .send(Request { + id: layer.id, + duration: Duration::from_secs(2 * layer.frames.len() as u64), + }) + .await + .expect("Layer permit service is not available"); + + if layer_permit { + let layer_id = layer.id; + self.process_layer(layer, cluster).await; + debug!("{}: Processed layer", layer_disp); + + self.layer_permit_service + .send(Release { id: layer_id }) + .await + .expect("Layer permit service is not available"); + + processed_layers.fetch_add(1, Ordering::Relaxed); + } else { + debug!( + "Layer skipped. {} already being processed by another task.", + layer + ); + } + } + + if processed_layers.load(Ordering::Relaxed) == 0 { + WASTED_ATTEMPTS.fetch_add(1, Ordering::Relaxed); + debug!("Job {} didn't process any layer", job_disp); + } + } + Err(err) => { + error!("Failed to query layers. {:?}", err); + } + } + } + + /// Validates whether a host is suitable for a specific layer. + /// + /// Subscriptions: Check whether this hosts' subscription can book at least one frame + /// + /// # Arguments + /// + /// * `_host` - The host to validate + /// * `_layer_id` - The layer ID to validate against + /// + /// # Returns + /// + /// * `bool` - True if the match is valid + fn validate_match( + host: &Host, + _layer_id: &Uuid, + show_id: &Uuid, + cores_requested: CoreSize, + allocation_service: &AllocationService, + os: Option<&str>, + ) -> bool { + // Check OS compatibility + if host.str_os.as_deref() != os { + return false; + } + + if let Some(subscription) = allocation_service.get_subscription(&host.alloc_name, show_id) { + if !subscription.bookable(&cores_requested) { + return false; + } + } else { + return false; + }; + + true + } + + /// Filters cluster tags to include only those that are also present in the dispatch layer tags. + /// + /// # Arguments + /// + /// * `cluster` - The cluster containing available tags + /// * `dispatch_layer` - The layer with tag requirements + /// + /// # Returns + /// + /// * `Vec` - Tags that exist in both the cluster and the dispatch layer + fn filter_matching_tags(cluster: &Cluster, dispatch_layer: &DispatchLayer) -> Vec { + // Extract tags from cluster and filter by layer tags + match cluster { + Cluster::ComposedKey(cluster_key) => { + if dispatch_layer.tags.contains(cluster_key.tag.name.as_str()) { + vec![cluster_key.tag.clone()] + } else { + vec![] + } + } + Cluster::TagsKey(_facility_id, cluster_tags) => cluster_tags + .iter() + .filter(|tag| dispatch_layer.tags.contains(tag.name.as_str())) + .cloned() + .collect(), + } + } + + /// Processes a single layer by finding host candidates and attempting dispatch. + /// + /// The process: + /// 1. Checks out host candidates from the host cache + /// 2. Attempts dispatch on each candidate until successful or attempts exhausted + /// 3. Handles various dispatch errors (resource exhaustion, allocation limits, etc.) + /// 4. Returns hosts back to the cache after use + /// + /// # Arguments + /// + /// * `dispatch_layer` - The layer to dispatch to a host + /// * `cluster` - The cluster context for this dispatch operation + async fn process_layer(&self, dispatch_layer: DispatchLayer, cluster: Arc) { + let mut try_again = true; + let mut attempts = CONFIG.queue.host_candidate_attemps_per_layer; + let initial_attempts = attempts; + + // Use Option to handle ownership transfer cleanly + let mut current_layer_version = Some(dispatch_layer); + + while try_again && attempts > 0 { + attempts -= 1; + HOSTS_ATTEMPTED.fetch_add(1, Ordering::Relaxed); + + // Take ownership of the layer for this iteration + let layer = current_layer_version + .take() + .expect("Layer should be available"); + + // Filter layer tags to match the scope of the cluster in context + let tags = Self::filter_matching_tags(&cluster, &layer); + assert!( + !tags.is_empty(), + "Layer shouldn't be here if it doesn't contain at least one matching tag" + ); + trace!( + "{}: Getting a host candidate for {}, {}", + layer, + layer.facility_id, + layer.show_id + ); + + // Clone only the minimal data needed for the validation closure + // These are needed because the closure must have 'static lifetime for actor messaging + let layer_id = layer.id; + let show_id = layer.show_id; + let cores_requested = layer.cores_min; + let allocation_service = self.allocation_service.clone(); + let os = layer.str_os.clone(); + + let host_candidate = self + .host_service + .send(CheckOut { + facility_id: layer.facility_id, + show_id: layer.show_id, + tags, + cores: cores_requested, + memory: layer.mem_min, + validation: move |host| { + Self::validate_match( + host, + &layer_id, + &show_id, + cores_requested, + &allocation_service, + os.as_deref(), + ) + }, + }) + .await + .expect("Host Cache actor is unresponsive"); + + match host_candidate { + Ok(CheckedOutHost(cluster_key, host)) => { + let host_before_dispatch = host.clone(); + // Store layer info for error logging before moving ownership + let layer_display = format!("{}", layer); + let layer_job_id = layer.job_id; + + match self + .dispatcher_service + .send(DispatchLayerMessage { + layer, // Move ownership here + host, + }) + .await + .expect("Dispatcher actor is unresponsive") + { + Ok(DispatchResult { + updated_host, + updated_layer, + }) => { + self.host_service + .send(CheckIn(cluster_key, CheckInPayload::Host(updated_host))) + .await + .expect("Host Cache actor is unresponsive"); + + if updated_layer.frames.is_empty() { + // Stop on the first successful attempt + debug!("Layer {} fully consumed.", updated_layer,); + // Track how many candidates were needed to fully consume this layer + let candidates_used = initial_attempts - attempts + 1; + metrics::observe_candidates_per_layer(candidates_used); + try_again = false; + } else { + debug!( + "Layer {} not fully consumed. {} frames left", + updated_layer, + updated_layer.frames.len() + ); + try_again = true; + // Put the updated layer back for the next iteration + current_layer_version = Some(updated_layer); + } + } + Err(err) => { + // On error, we lost the layer since it was moved to DispatchLayerMessage + // This means we can't continue with this layer + Self::log_dispatch_error_with_info( + err, + &layer_display, + &layer_job_id, + &host_before_dispatch, + ); + self.host_service + .send(CheckIn( + cluster_key, + CheckInPayload::Invalidate(host_before_dispatch.id), + )) + .await + .expect("Host Cache actor is unresponsive"); + try_again = false; // Can't continue without the layer + } + }; + } + Err(err) => { + // Put the layer back since we didn't use it + current_layer_version = Some(layer); + + match err { + crate::host_cache::HostCacheError::NoCandidateAvailable => { + debug!( + "No host candidate available for layer {}. {:?}", + current_layer_version.as_ref().unwrap(), + err + ); + metrics::increment_no_candidate_iterations(); + try_again = false; + } + crate::host_cache::HostCacheError::FailedToQueryHostCache(err) => { + panic!("Cache is no longer able to access the database. {}", err) + } + } + } + } + } + } + + /// Handles various dispatch errors with appropriate logging and actions. + /// + /// Uses pre-computed layer info since the layer ownership may have been moved. + /// Different error types result in different log levels and recovery strategies. + /// + /// # Arguments + /// + /// * `error` - The dispatch error that occurred + /// * `layer_display` - Pre-computed display string for the layer + /// * `layer_job_id` - The job ID from the layer + /// * `host` - The host that the dispatch was attempted on + fn log_dispatch_error_with_info( + error: DispatchError, + layer_display: &str, + layer_job_id: &Uuid, + host: &Host, + ) { + match error { + DispatchError::HostLock(host_name) => { + info!("Failed to acquire lock for host {}", host_name) + } + DispatchError::Failure(report) => { + error!( + "{:?}", + report.wrap_err(format!("Failed to dispatch {} on {}.", layer_display, host)) + ); + } + DispatchError::AllocationOverBurst(allocation_name) => { + let msg = format!( + "Skiping host in this selection for {}. Allocation {} is over burst.", + layer_job_id, allocation_name + ); + info!(msg); + } + DispatchError::FailedToStartOnDb(sqlx_error) => { + error!( + "Failed to Start frame on Database when dispatching {} on {}. {:?}", + layer_display, host, sqlx_error + ); + } + DispatchError::DbFailure(error) => { + error!( + "Failed to Start due to database error when dispatching {} on {}. {:?}", + layer_display, host, error + ); + } + DispatchError::FailureGrpcConnection(_, report) => { + error!( + "{:?}", + report.wrap_err(format!( + "{} failed to create a GRPC connection to {}.", + layer_display, host + )) + ); + } + DispatchError::GrpcFailure(status) => { + error!( + "{} failed to create execute grpc command on {}. {:?}", + layer_display, host, status + ); + } + DispatchError::FailedToCreateProc { + error, + frame_id, + host_id, + } => { + error!( + "Failed to create proc for frame {} on host {}. {:?}", + frame_id, host_id, error + ); + } + DispatchError::FailedToUpdateResources(report) => { + error!( + "{:?}", + report.wrap_err(format!( + "Failed to update resources for dispatching {} on {}.", + layer_display, host + )) + ); + } + } + } +} diff --git a/rust/crates/scheduler/src/pipeline/mod.rs b/rust/crates/scheduler/src/pipeline/mod.rs new file mode 100644 index 000000000..44a9ce5b5 --- /dev/null +++ b/rust/crates/scheduler/src/pipeline/mod.rs @@ -0,0 +1,13 @@ +mod dispatcher; +pub mod entrypoint; +mod layer_permit; +mod matcher; + +pub use entrypoint::run; +pub use matcher::MatchingService; + +#[allow(unused_imports)] +pub use matcher::HOSTS_ATTEMPTED; + +#[allow(unused_imports)] +pub use matcher::WASTED_ATTEMPTS; diff --git a/rust/crates/scheduler/test_schema.sql b/rust/crates/scheduler/test_schema.sql new file mode 100644 index 000000000..e69de29bb diff --git a/rust/crates/scheduler/tests/smoke_tests.rs b/rust/crates/scheduler/tests/smoke_tests.rs new file mode 100644 index 000000000..eb842a371 --- /dev/null +++ b/rust/crates/scheduler/tests/smoke_tests.rs @@ -0,0 +1,1100 @@ +mod util; + +/// Smoke tests to exercice some scenarios +/// +/// These tests start from the main service entry points and test the complete flow: +/// 1. Service discovers clusters from the database +/// 2. Service queries jobs from each cluster +/// 3. Service processes job layers and finds host candidates +/// 4. Service dispatches frames to matched hosts in dry run mode +/// 5. Service updates database state appropriately +/// +/// # Database Setup +/// +/// Tests assume a local PostgreSQL database is running with: +/// - Host: localhost +/// - Port: 5432 +/// - Database: cuebot_test +/// - Username: cuebot_test +/// - Password: password +/// +/// # Running Integration Tests +/// +/// These tests are gated behind the `integration-tests` feature flag and are not +/// run by default with `cargo test`. To run them: +/// +/// ```bash +/// # Run all tests including integration tests +/// cargo test --features integration-tests +/// +/// # Run only integration tests +/// cargo test --features integration-tests integration_tests_full +/// ``` +#[cfg(all(test, feature = "smoke-tests"))] +mod scheduler_smoke_test { + use std::time::Duration; + + use scheduler::{ + cluster::{Cluster, ClusterFeed}, + cluster_key::{ClusterKey, Tag, TagType}, + pipeline, + }; + use tracing::info; + use tracing_test::traced_test; + use uuid::Uuid; + + use crate::util::WaitingFrameClause; + use std::sync::Arc; + + use scheduler::{config::OVERRIDE_CONFIG, pgpool::connection_pool}; + use serial_test::serial; + use sqlx::{Pool, Postgres, Transaction}; + use tokio::time::sleep; + use tokio_test::assert_ok; + + use crate::util::{create_test_config, get_waiting_frames_count, test_connection_pool}; + + use super::*; + + async fn setup_test_database() -> Result>, sqlx::Error> { + let pool = test_connection_pool() + .await + .map_err(|e| sqlx::Error::Configuration(e.to_string().into()))?; + + // Force a thorough cleanup before starting + cleanup_test_data(&pool).await?; + + Ok(pool) + } + + async fn cleanup_test_data(pool: &Pool) -> Result<(), sqlx::Error> { + // Use a single transaction for all cleanup operations to ensure atomicity + let mut tx = pool.begin().await?; + + // Temporarily disable triggers to avoid issues during cleanup + sqlx::query("SET session_replication_role = 'replica'") + .execute(&mut *tx) + .await?; + + // Delete in reverse dependency order - most dependent tables first + // Use consistent naming patterns and ignore errors for non-existent data + + // Delete job_history records for test jobs first + let _ = sqlx::query("DELETE FROM job_history WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + let _ = sqlx::query("DELETE FROM frame WHERE str_name LIKE '%integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete layer stats and resources + let _ = sqlx::query("DELETE FROM layer_stat WHERE pk_layer IN (SELECT pk_layer FROM layer WHERE str_name LIKE 'integ_test_%')") + .execute(&mut *tx) + .await; + let _ = sqlx::query("DELETE FROM layer_resource WHERE pk_layer IN (SELECT pk_layer FROM layer WHERE str_name LIKE 'integ_test_%')") + .execute(&mut *tx) + .await; + + // Delete layers + let _ = sqlx::query("DELETE FROM layer WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete job stats and resources + let _ = sqlx::query("DELETE FROM job_stat WHERE pk_job IN (SELECT pk_job FROM job WHERE str_name LIKE 'integ_test_%')") + .execute(&mut *tx) + .await; + let _ = sqlx::query("DELETE FROM job_resource WHERE pk_job IN (SELECT pk_job FROM job WHERE str_name LIKE 'integ_test_%')") + .execute(&mut *tx) + .await; + + // Delete jobs + let _ = sqlx::query("DELETE FROM job WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete folder + let _ = sqlx::query("DELETE FROM folder WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete host stats and tags + let _ = sqlx::query("DELETE FROM host_stat WHERE pk_host IN (SELECT pk_host FROM host WHERE str_name LIKE 'integ_test_%')") + .execute(&mut *tx) + .await; + let _ = sqlx::query("DELETE FROM host_tag WHERE str_tag LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete hosts + let _ = sqlx::query("DELETE FROM host WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete subscriptions + let _ = sqlx::query("DELETE FROM subscription WHERE pk_alloc IN (SELECT pk_alloc FROM alloc WHERE str_name LIKE 'integ_test_%')") + .execute(&mut *tx) + .await; + + // Delete allocs + let _ = sqlx::query("DELETE FROM alloc WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete shows + let _ = sqlx::query("DELETE FROM show WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Delete facilities and departments + let _ = sqlx::query("DELETE FROM facility WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + let _ = sqlx::query("DELETE FROM dept WHERE str_name LIKE 'integ_test_%'") + .execute(&mut *tx) + .await; + + // Clean up any orphaned records that might cause constraint violations + let _ = sqlx::query("DELETE FROM job_stat WHERE pk_job NOT IN (SELECT pk_job FROM job)") + .execute(&mut *tx) + .await; + let _ = sqlx::query( + "DELETE FROM layer_stat WHERE pk_layer NOT IN (SELECT pk_layer FROM layer)", + ) + .execute(&mut *tx) + .await; + let _ = sqlx::query( + "DELETE FROM layer_resource WHERE pk_layer NOT IN (SELECT pk_layer FROM layer)", + ) + .execute(&mut *tx) + .await; + let _ = + sqlx::query("DELETE FROM job_resource WHERE pk_job NOT IN (SELECT pk_job FROM job)") + .execute(&mut *tx) + .await; + + // Re-enable triggers + sqlx::query("SET session_replication_role = 'origin'") + .execute(&mut *tx) + .await?; + + // Commit the transaction + tx.commit().await?; + + Ok(()) + } + + /// Creates comprehensive test data for integration testing with multiple scenarios + async fn create_test_data(pool: &Pool) -> Result { + // Create unique suffix for this test run to avoid conflicts when running tests concurrently + let test_suffix = Uuid::new_v4().to_string()[..8].to_string(); + // Create basic entities + let facility_id = Uuid::new_v4(); + let dept_id = Uuid::new_v4(); + let show_id = Uuid::new_v4(); + + let mut tx = pool.begin().await?; + + // Create facility + sqlx::query("INSERT INTO facility (pk_facility, str_name) VALUES ($1, $2)") + .bind(facility_id.to_string()) + .bind(format!("integ_test_facility_{}", test_suffix)) + .execute(&mut *tx) + .await?; + + // Create department + sqlx::query("INSERT INTO dept (pk_dept, str_name) VALUES ($1, $2)") + .bind(dept_id.to_string()) + .bind(format!("integ_test_dept_{}", test_suffix)) + .execute(&mut *tx) + .await?; + + // Create show + sqlx::query("INSERT INTO show (pk_show, str_name) VALUES ($1, $2)") + .bind(show_id.to_string()) + .bind(format!("integ_test_show_{}", test_suffix)) + .execute(&mut *tx) + .await?; + + // Create allocations for different tag types + let hostname_alloc = create_allocation( + &mut tx, + facility_id, + &format!("integ_test_hostname_alloc_{}", test_suffix), + "HOSTNAME", + ) + .await?; + let alloc_alloc = create_allocation( + &mut tx, + facility_id, + &format!("integ_test_alloc_alloc_{}", test_suffix), + "ALLOC", + ) + .await?; + let manual_alloc = create_allocation( + &mut tx, + facility_id, + &format!("integ_test_manual_alloc_{}", test_suffix), + "MANUAL", + ) + .await?; + + // Create subscriptions with different resource limits + create_subscription(&mut tx, hostname_alloc, show_id, 1000, 1200).await?; + create_subscription(&mut tx, alloc_alloc, show_id, 800, 1000).await?; + create_subscription(&mut tx, manual_alloc, show_id, 600, 800).await?; + + // Create hosts with different tag types and resource configurations + let hostname_host = create_host( + &mut tx, + hostname_alloc, + &format!("integ_test_hostname_host_{}", test_suffix), + 16, + 32 * 1024 * 1024, + 4, + 8 * 1024 * 1024, + vec![( + &format!("integ_test_hostname_tag_{}", test_suffix), + "HOSTNAME", + )], + ) + .await?; + + let alloc_host = create_host( + &mut tx, + alloc_alloc, + &format!("integ_test_alloc_host_{}", test_suffix), + 12, + 16 * 1024 * 1024, + 2, + 4 * 1024 * 1024, + vec![(&format!("integ_test_alloc_tag_{}", test_suffix), "ALLOC")], + ) + .await?; + + let manual_host = create_host( + &mut tx, + manual_alloc, + &format!("integ_test_manual_host_{}", test_suffix), + 8, + 8 * 1024 * 1024, + 1, + 2 * 1024 * 1024, + vec![(&format!("integ_test_manual_tag_{}", test_suffix), "MANUAL")], + ) + .await?; + + // Create folder + let folder_id = create_folder( + &mut tx, + show_id, + dept_id, + &format!("integ_test_folder_{}", test_suffix), + ) + .await?; + + tx.commit().await?; + + // Create comprehensive job scenarios + let hostname_job = create_job_scenario( + pool, + show_id, + facility_id, + dept_id, + folder_id, + &format!("integ_test_hostname_job_{}", test_suffix), + vec![ + ( + &format!("integ_test_hostname_layer1_{}", test_suffix), + &format!("integ_test_hostname_tag_{}", test_suffix), + 2, + 2 * 1024 * 1024, + 1, + 1024 * 1024, + ), + ( + &format!("integ_test_hostname_layer2_{}", test_suffix), + &format!("integ_test_different_tag{}", test_suffix), + 4, + 4 * 1024 * 1024, + 0, + 0, + ), + ], + 3, + ) + .await?; + + let alloc_job = create_job_scenario( + pool, + show_id, + facility_id, + dept_id, + folder_id, + &format!("integ_test_alloc_job_{}", test_suffix), + vec![( + &format!("integ_test_alloc_layer_{}", test_suffix), + &format!("integ_test_alloc_tag_{}", test_suffix), + 1, + 1024 * 1024, + 1, + 512 * 1024, + )], + 3, + ) + .await?; + + let manual_job = create_job_scenario( + pool, + show_id, + facility_id, + dept_id, + folder_id, + &format!("integ_test_manual_job_{}", test_suffix), + vec![( + &format!("integ_test_manual_layer_{}", test_suffix), + &format!("integ_test_manual_tag_{}", test_suffix), + 1, + 1024 * 1024, + 0, + 0, + )], + 3, + ) + .await?; + + // Create a mixed-tag job that requires multiple host types + let mixed_job = create_job_scenario( + pool, + show_id, + facility_id, + dept_id, + folder_id, + &format!("integ_test_mixed_job_{}", test_suffix), + vec![ + ( + &format!("integ_test_mixed_hostname_{}", test_suffix), + &format!("integ_test_hostname_tag_{}", test_suffix), + 1, + 1024 * 1024, + 0, + 0, + ), + ( + &format!("integ_test_mixed_alloc_{}", test_suffix), + &format!("integ_test_alloc_tag_{}", test_suffix), + 1, + 1024 * 1024, + 0, + 0, + ), + ( + &format!("integ_test_mixed_manual_{}", test_suffix), + &format!("integ_test_manual_tag_{}", test_suffix), + 1, + 1024 * 1024, + 0, + 0, + ), + ], + 3, + ) + .await?; + + Ok(TestData { + facility_id, + show_id, + dept_id, + hostname_alloc, + alloc_alloc, + manual_alloc, + hostname_host, + alloc_host, + manual_host, + hostname_job, + alloc_job, + manual_job, + mixed_job, + test_suffix, + }) + } + + async fn create_allocation( + pool: &mut Transaction<'static, Postgres>, + facility_id: Uuid, + name: &str, + tag: &str, + ) -> Result { + let alloc_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO alloc (pk_alloc, str_name, pk_facility, str_tag) VALUES ($1, $2, $3, $4)", + ) + .bind(alloc_id.to_string()) + .bind(name) + .bind(facility_id.to_string()) + .bind(tag) + .execute(&mut **pool) + .await?; + Ok(alloc_id) + } + + async fn create_subscription( + pool: &mut Transaction<'static, Postgres>, + alloc_id: Uuid, + show_id: Uuid, + size: i64, + burst: i64, + ) -> Result<(), sqlx::Error> { + let subscription_id = Uuid::new_v4(); + sqlx::query("INSERT INTO subscription (pk_subscription, pk_alloc, pk_show, int_size, int_burst) VALUES ($1, $2, $3, $4, $5)") + .bind(subscription_id.to_string()) + .bind(alloc_id.to_string()) + .bind(show_id.to_string()) + .bind(size) + .bind(burst) + .execute(&mut **pool) + .await?; + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + async fn create_host( + pool: &mut Transaction<'static, Postgres>, + alloc_id: Uuid, + name: &str, + cores: i64, + memory_kb: i64, + gpus: i64, + gpu_memory_kb: i64, + tags: Vec<(&str, &str)>, + ) -> Result { + let host_id = Uuid::new_v4(); + + // Create host + sqlx::query( + "INSERT INTO host (pk_host, pk_alloc, str_name, str_lock_state, int_cores, int_cores_idle, int_mem, int_mem_idle, int_gpus, int_gpus_idle, int_gpu_mem, int_gpu_mem_idle, int_thread_mode) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)" + ) + .bind(host_id.to_string()) + .bind(alloc_id.to_string()) + .bind(name) + .bind("OPEN") + .bind(cores * 100) // Core multiplier + .bind(cores * 100) + .bind(memory_kb) + .bind(memory_kb) + .bind(gpus) + .bind(gpus) + .bind(gpu_memory_kb) + .bind(gpu_memory_kb) + .bind(0) // ThreadMode::Auto + .execute(&mut **pool) + .await?; + + // Create host_stat + let host_stat_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO host_stat (pk_host_stat, pk_host, str_state, str_os, int_gpu_mem_total, int_gpu_mem_free) VALUES ($1, $2, $3, $4, $5, $6)" + ) + .bind(host_stat_id.to_string()) + .bind(host_id.to_string()) + .bind("UP") + .bind("linux") + .bind(gpu_memory_kb) + .bind(gpu_memory_kb) + .execute(&mut **pool) + .await?; + + // Create host tags + for (tag_name, tag_type) in tags { + let tag_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO host_tag (pk_host_tag, pk_host, str_tag, str_tag_type) VALUES ($1, $2, $3, $4)" + ) + .bind(tag_id.to_string()) + .bind(host_id.to_string()) + .bind(tag_name) + .bind(tag_type) + .execute(&mut **pool) + .await?; + } + + Ok(TestHost { + id: host_id, + name: name.to_string(), + alloc_id, + }) + } + + async fn create_folder( + pool: &mut Transaction<'static, Postgres>, + show_id: Uuid, + dept_id: Uuid, + name: &str, + ) -> Result { + let folder_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO folder (pk_folder, pk_show, pk_dept, str_name) VALUES ($1, $2, $3, $4)", + ) + .bind(folder_id.to_string()) + .bind(show_id.to_string()) + .bind(dept_id.to_string()) + .bind(name) + .execute(&mut **pool) + .await?; + Ok(folder_id) + } + + #[allow(clippy::too_many_arguments)] + async fn create_job_scenario( + pool: &Pool, + show_id: Uuid, + facility_id: Uuid, + dept_id: Uuid, + folder_id: Uuid, + job_name: &str, + layers: Vec<(&str, &str, i64, i64, i64, i64)>, // (layer_name, tag, min_cores, min_mem, min_gpus, min_gpu_mem) + frames_by_layer: usize, + ) -> Result { + let mut tx = pool.begin().await?; + let job_id = Uuid::new_v4(); + + // Create job + sqlx::query( + "INSERT INTO job (pk_job, pk_folder, pk_show, pk_facility, pk_dept, str_name, str_visible_name, str_shot, str_user, str_state) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)" + ) + .bind(job_id.to_string()) + .bind(folder_id.to_string()) + .bind(show_id.to_string()) + .bind(facility_id.to_string()) + .bind(dept_id.to_string()) + .bind(job_name) + .bind(job_name) + .bind(format!("integ_test_shot_{}", job_name.split('_').next_back().unwrap_or("default"))) + .bind(format!("integ_test_user_{}", job_name.split('_').next_back().unwrap_or("default"))) + .bind("PENDING") + .execute(&mut *tx) + .await?; + + // Create job stats with waiting frames + // First check if job_stat already exists for this job (might be created by triggers) + let existing_job_stat = + sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM job_stat WHERE pk_job = $1") + .bind(job_id.to_string()) + .fetch_one(&mut *tx) + .await?; + + if existing_job_stat == 0 { + let total_waiting_frames = layers.len() * 3; // 3 frames per layer + sqlx::query( + "INSERT INTO job_stat (pk_job_stat, pk_job, int_waiting_count) VALUES ($1, $2, $3)", + ) + .bind(Uuid::new_v4().to_string()) + .bind(job_id.to_string()) + .bind(total_waiting_frames as i64) + .execute(&mut *tx) + .await?; + } else { + // Update existing job_stat + let total_waiting_frames = layers.len() * 3; // 3 frames per layer + sqlx::query("UPDATE job_stat SET int_waiting_count = $1 WHERE pk_job = $2") + .bind(total_waiting_frames as i64) + .bind(job_id.to_string()) + .execute(&mut *tx) + .await?; + } + + // Create job resource + // First check if job_resource already exists for this job (might be created by triggers) + let existing_job_resource = + sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM job_resource WHERE pk_job = $1") + .bind(job_id.to_string()) + .fetch_one(&mut *tx) + .await?; + + if existing_job_resource == 0 { + sqlx::query( + "INSERT INTO job_resource (pk_job_resource, pk_job, int_priority) VALUES ($1, $2, $3)", + ) + .bind(Uuid::new_v4().to_string()) + .bind(job_id.to_string()) + .bind(1) + .execute(&mut *tx) + .await?; + } else { + // Update existing job_resource + sqlx::query("UPDATE job_resource SET int_priority = $1 WHERE pk_job = $2") + .bind(1) + .bind(job_id.to_string()) + .execute(&mut *tx) + .await?; + } + + let mut test_layers = Vec::new(); + + for (layer_name, tag, min_cores, min_mem, min_gpus, min_gpu_mem) in layers { + let layer_id = Uuid::new_v4(); + + // Create layer + sqlx::query( + "INSERT INTO layer (pk_layer, pk_job, str_name, str_cmd, str_range, str_tags, str_type, int_cores_min, int_mem_min, int_gpus_min, int_gpu_mem_min) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)" + ) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .bind(layer_name) + .bind("echo 'Integration test frame'") + .bind("1-3") + .bind(tag) + .bind("PRE") // Default layer type + .bind(min_cores * 100) // Core multiplier + .bind(min_mem) + .bind(min_gpus) + .bind(min_gpu_mem) + .execute(&mut *tx) + .await?; + + // Create layer stats + sqlx::query( + "INSERT INTO layer_stat (pk_layer_stat, pk_layer, pk_job, int_waiting_count, int_total_count) VALUES ($1, $2, $3, $4, $5) ON CONFLICT (pk_layer) DO UPDATE SET int_waiting_count = EXCLUDED.int_waiting_count, int_total_count = EXCLUDED.int_total_count" + ) + .bind(Uuid::new_v4().to_string()) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .bind(3) // 3 waiting frames + .bind(3) // 3 total frames + .execute(&mut *tx) + .await?; + + // Create layer resource + // Check if layer_resource already exists for this layer (might be created by triggers) + let existing_layer_resource = sqlx::query_scalar::<_, i64>( + "SELECT COUNT(*) FROM layer_resource WHERE pk_layer = $1", + ) + .bind(layer_id.to_string()) + .fetch_one(&mut *tx) + .await?; + + if existing_layer_resource == 0 { + sqlx::query( + "INSERT INTO layer_resource (pk_layer_resource, pk_layer, pk_job) VALUES ($1, $2, $3)" + ) + .bind(Uuid::new_v4().to_string()) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .execute(&mut *tx) + .await?; + } + + // Create frames (1-3) + for frame_num in 1..=frames_by_layer as i32 { + let frame_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO frame (pk_frame, pk_layer, pk_job, str_name, str_state, int_number, int_layer_order, int_dispatch_order) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)" + ) + .bind(frame_id.to_string()) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .bind(format!("{}-frame{}", frame_num, layer_name)) + .bind("WAITING") + .bind(frame_num) + .bind(frame_num) + .bind(frame_num) + .execute(&mut *tx) + .await?; + } + + test_layers.push(TestLayer { + id: layer_id, + name: layer_name.to_string(), + tag: tag.to_string(), + }); + } + tx.commit().await?; + + Ok(TestJob { + id: job_id, + name: job_name.to_string(), + layers: test_layers, + frames_by_layer, + }) + } + + #[allow(dead_code)] + struct TestData { + facility_id: Uuid, + show_id: Uuid, + dept_id: Uuid, + hostname_alloc: Uuid, + alloc_alloc: Uuid, + manual_alloc: Uuid, + hostname_host: TestHost, + alloc_host: TestHost, + manual_host: TestHost, + hostname_job: TestJob, + alloc_job: TestJob, + manual_job: TestJob, + mixed_job: TestJob, + test_suffix: String, + } + + impl TestData { + fn num_frames(&self) -> usize { + self.alloc_job.layers.len() * self.alloc_job.frames_by_layer + + self.mixed_job.layers.len() * self.mixed_job.frames_by_layer + + self.manual_job.layers.len() * self.manual_job.frames_by_layer + + self.hostname_job.layers.len() * self.hostname_job.frames_by_layer + } + } + + #[derive(Debug)] + #[allow(dead_code)] + struct TestHost { + id: Uuid, + name: String, + alloc_id: Uuid, + } + + #[derive(Debug)] + #[allow(dead_code)] + struct TestJob { + id: Uuid, + name: String, + layers: Vec, + frames_by_layer: usize, + } + + #[derive(Debug)] + #[allow(dead_code)] + struct TestLayer { + id: Uuid, + name: String, + tag: String, + } + + /// Helper function to run a test with proper setup and cleanup + async fn test_wrapper( + test_name: &str, + test_fn: F, + ) -> Result<(), Box> + where + F: FnOnce(TestData) -> Fut, + Fut: std::future::Future, + { + info!("Starting integration test: {}", test_name); + + // Setup database and test data + let pool = setup_test_database().await?; + sleep(Duration::from_secs(3)).await; + + // Log pool status + info!( + "Pool status - Size: {}, Idle: {}", + pool.size(), + pool.num_idle() + ); + let test_data = create_test_data(&pool).await?; + // Wait for data transactions to clear + sleep(Duration::from_secs(1)).await; + + // Set global config + let _ = OVERRIDE_CONFIG.set(create_test_config()); + + // Run the test + test_fn(test_data).await; + + Ok(()) + } + + #[tokio::test] + #[traced_test] + #[serial] + async fn test_dispatch_hostname_tag_flow() { + let result = test_wrapper( + "test_dispatch_hostname_tag_flow", + test_dispatch_hostname_tag_flow_inner, + ) + .await; + assert_ok!(result, "Failure at test wrapper") + } + + async fn test_dispatch_hostname_tag_flow_inner(test_data: TestData) { + // Create a specific cluster feed for HOSTNAME tag testing + let hostname_cluster = Cluster::ComposedKey(ClusterKey { + facility_id: test_data.facility_id.to_string(), + show_id: test_data.show_id.to_string(), + tag: Tag { + name: format!("integ_test_hostname_tag_{}", test_data.test_suffix), + ttype: TagType::HostName, + }, + }); + + let cluster_feed = ClusterFeed::load_from_clusters(vec![hostname_cluster], &[]); + + info!("Starting HOSTNAME tag integration test..."); + + let waiting_frames_before = + get_waiting_frames_count(WaitingFrameClause::JobId(test_data.hostname_job.id)).await; + assert_eq!(waiting_frames_before, 6); + // Run the job fetcher with our test cluster feed + // This simulates the main service flow: cluster discovery → job querying → layer processing → dispatching + let result = pipeline::run(cluster_feed).await; + + match result { + Ok(()) => { + info!("✅ HOSTNAME tag integration test completed successfully"); + + let waiting_frames = + get_waiting_frames_count(WaitingFrameClause::JobId(test_data.hostname_job.id)) + .await; + info!( + "Job waiting count after processing: {}. Half the frames matched the expected tag", + waiting_frames + ); + assert_eq!(waiting_frames, 3); + + // In dry run mode, frames shouldn't actually be dispatched (state changes) + // but the service should have processed them without errors + } + Err(e) => { + panic!("❌ HOSTNAME tag integration test failed: {}", e); + } + } + } + + #[tokio::test] + #[traced_test] + #[serial] + async fn test_dispatch_alloc_tag_flow() { + let result = test_wrapper( + "test_dispatch_hostname_tag_flow", + test_dispatch_alloc_tag_flow_inner, + ) + .await; + assert_ok!(result, "Failure at test wrapper") + } + + async fn test_dispatch_alloc_tag_flow_inner(test_data: TestData) { + // Create a specific cluster feed for ALLOC tag testing + let alloc_cluster = Cluster::ComposedKey(ClusterKey { + facility_id: test_data.facility_id.to_string(), + show_id: test_data.show_id.to_string(), + tag: Tag { + name: format!("integ_test_alloc_tag_{}", test_data.test_suffix), + ttype: TagType::Alloc, + }, + }); + + let cluster_feed = ClusterFeed::load_from_clusters(vec![alloc_cluster], &[]); + + info!("Starting ALLOC tag integration test..."); + + let frame_count = test_data.num_frames(); + let waiting_frames_before = get_waiting_frames_count(WaitingFrameClause::All).await; + assert_eq!(waiting_frames_before, frame_count); + + let result = pipeline::run(cluster_feed).await; + + match result { + Ok(()) => { + let waiting_frames_after = get_waiting_frames_count(WaitingFrameClause::All).await; + let target_frames = + test_data.alloc_job.frames_by_layer + test_data.mixed_job.frames_by_layer; + assert_eq!(waiting_frames_after, frame_count - target_frames); + info!("✅ ALLOC tag integration test completed successfully"); + } + Err(e) => { + panic!("❌ ALLOC tag integration test failed: {}", e); + } + } + } + + #[tokio::test] + #[traced_test] + #[serial] + async fn test_dispatch_manual_tag_flow() { + let result = test_wrapper( + "test_dispatch_manual_tag_flow", + test_dispatch_manual_tag_flow_inner, + ) + .await; + assert_ok!(result, "Failure at test wrapper") + } + + async fn test_dispatch_manual_tag_flow_inner(test_data: TestData) { + // Create a cluster feed with MANUAL tags (chunked) + let manual_cluster = Cluster::TagsKey(vec![Tag { + name: format!("integ_test_manual_tag_{}", test_data.test_suffix), + ttype: TagType::Manual, + }]); + + let cluster_feed = ClusterFeed::load_from_clusters(vec![manual_cluster], &[]); + + info!("Starting MANUAL tag integration test..."); + let frame_count = test_data.num_frames(); + let waiting_frames_before = get_waiting_frames_count(WaitingFrameClause::All).await; + assert_eq!(waiting_frames_before, frame_count); + + let result = pipeline::run(cluster_feed).await; + + match result { + Ok(()) => { + let waiting_frames_after = get_waiting_frames_count(WaitingFrameClause::All).await; + let target_frames = + test_data.manual_job.frames_by_layer + test_data.manual_job.frames_by_layer; + assert_eq!(waiting_frames_after, frame_count - target_frames); + info!("✅ MANUAL tag integration test completed successfully"); + } + Err(e) => { + panic!("❌ MANUAL tag integration test failed: {}", e); + } + } + } + + #[tokio::test] + #[traced_test] + #[serial] + async fn test_dispatch_mixed_job_scenario() { + let result = test_wrapper( + "test_dispatch_mixed_job_scenario", + test_dispatch_mixed_job_scenario_inner, + ) + .await; + assert_ok!(result, "Failure at test wrapper") + } + + async fn test_dispatch_mixed_job_scenario_inner(test_data: TestData) { + // Create multiple clusters to handle the mixed job with different tag types + let clusters = vec![ + Cluster::ComposedKey(ClusterKey { + facility_id: test_data.facility_id.to_string(), + show_id: test_data.show_id.to_string(), + tag: Tag { + name: format!("integ_test_hostname_tag_{}", test_data.test_suffix), + ttype: TagType::HostName, + }, + }), + Cluster::ComposedKey(ClusterKey { + facility_id: test_data.facility_id.to_string(), + show_id: test_data.show_id.to_string(), + tag: Tag { + name: format!("integ_test_alloc_tag_{}", test_data.test_suffix), + ttype: TagType::Alloc, + }, + }), + Cluster::TagsKey(vec![Tag { + name: format!("integ_test_manual_tag_{}", test_data.test_suffix), + ttype: TagType::Manual, + }]), + ]; + + let cluster_feed = ClusterFeed::load_from_clusters(clusters, &[]); + + info!("Starting mixed job scenario integration test..."); + + let result = pipeline::run(cluster_feed).await; + let pool = assert_ok!(connection_pool().await); + + match result { + Ok(()) => { + info!("✅ Mixed job scenario integration test completed successfully"); + + // Verify that all layers of the mixed job were processed + let mixed_job_layers = + sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM layer WHERE pk_job = $1") + .bind(test_data.mixed_job.id.to_string()) + .fetch_one(&*pool) + .await + .expect("Failed to query mixed job layers"); + + info!("Mixed job has {} layers", mixed_job_layers); + assert_eq!( + mixed_job_layers, 3, + "Mixed job should have 3 layers with different tags" + ); + } + Err(e) => { + panic!("❌ Mixed job scenario integration test failed: {}", e); + } + } + } + + #[tokio::test] + #[traced_test] + #[serial] + async fn test_dispatcher_no_matching_hosts() { + let result = test_wrapper( + "test_dispatcher_no_matching_hosts", + test_dispatcher_no_matching_hosts_inner, + ) + .await; + assert_ok!(result, "Failure at test wrapper") + } + + async fn test_dispatcher_no_matching_hosts_inner(_test_data: TestData) { + // Create a cluster with a non-existent tag that won't match any hosts + let non_matching_cluster = Cluster::TagsKey(vec![Tag { + name: "non_existent_tag".to_string(), + ttype: TagType::Manual, + }]); + + let cluster_feed = ClusterFeed::load_from_clusters(vec![non_matching_cluster], &[]); + + info!("Starting no matching hosts integration test..."); + + let waiting_frames_before = get_waiting_frames_count(WaitingFrameClause::All).await; + assert_eq!(waiting_frames_before, 21); + + let result = pipeline::run(cluster_feed).await; + + match result { + Ok(()) => { + let waiting_frames_after = get_waiting_frames_count(WaitingFrameClause::All).await; + assert_eq!(waiting_frames_after, 21); + + info!("✅ No matching hosts integration test completed successfully"); + // The service should handle no matching hosts gracefully + } + Err(e) => { + panic!("❌ No matching hosts integration test failed: {}", e); + } + } + } + + // #[tokio::test] + // #[traced_test] + // async fn test_full_service_cluster_discovery() { + // let pool = setup_test_database() + // .await + // .expect("Failed to setup test database"); + // let _test_data = create_full_integration_test_data(&pool) + // .await + // .expect("Failed to create test data"); + // let _ = OVERRIDE_CONFIG.set(create_test_config()); + + // info!("Starting cluster discovery integration test..."); + + // // Test the full cluster discovery process + // let cluster_feed = ClusterFeed::load_all(true) + // .await + // .expect("Failed to load clusters"); + + // info!("Discovered {} clusters", cluster_feed.keys.len()); + + // // Run the job fetcher with discovered clusters + // let result = job_fetcher::run(cluster_feed).await; + + // match result { + // Ok(()) => { + // info!("✅ Cluster discovery integration test completed successfully"); + // } + // Err(e) => { + // panic!("❌ Cluster discovery integration test failed: {}", e); + // } + // } + + // cleanup_test_data(&pool) + // .await + // .expect("Failed to clean up test data"); + // pool.close().await; + // } +} diff --git a/rust/crates/scheduler/tests/stress_tests.rs b/rust/crates/scheduler/tests/stress_tests.rs new file mode 100644 index 000000000..565e8e0c1 --- /dev/null +++ b/rust/crates/scheduler/tests/stress_tests.rs @@ -0,0 +1,123 @@ +mod util; + +#[cfg(all(test, feature = "smoke-tests"))] + +mod stress_test { + use crate::util::WaitingFrameClause; + use std::{sync::atomic::Ordering, time::SystemTime}; + + use scheduler::{ + cluster::{self, ClusterFeed}, + config::OVERRIDE_CONFIG, + host_cache, pipeline, + }; + use tokio_test::assert_ok; + use tracing::info; + use tracing_test::traced_test; + use uuid::Uuid; + + use super::*; + use crate::util::{ + clean_up_test_data, create_test_config, create_test_data, get_waiting_frames_count, + TestData, + }; + + struct TestDescription { + test_name: String, + job_count: usize, + host_count: usize, + layer_count: usize, + frames_per_layer_count: usize, + tag_count: usize, + } + + impl TestDescription { + pub fn total_frames(&self) -> usize { + self.job_count * self.layer_count * self.frames_per_layer_count + } + } + + async fn setup(test_description: &TestDescription) -> Result { + let test_id = Uuid::new_v4().to_string()[..8].to_string(); + + create_test_data( + &test_description.test_name, + &test_id, + test_description.job_count, + test_description.host_count, + test_description.layer_count, + test_description.frames_per_layer_count, + test_description.tag_count, + ) + .await + } + + async fn tear_down(test_prefix: &str) -> Result<(), sqlx::Error> { + clean_up_test_data(test_prefix).await + } + + #[actix::test] + // #[traced_test] + async fn test_stress_small() { + let desc = TestDescription { + test_name: "sts".to_string(), + job_count: 200, + host_count: 1000, + layer_count: 4, + frames_per_layer_count: 2, + tag_count: 4, + }; + let _ = tracing_subscriber::fmt() + .with_max_level(tracing::Level::INFO) + .with_ansi(true) + .try_init(); + + // Set global config + let _ = OVERRIDE_CONFIG.set(create_test_config()); + let test_data = assert_ok!(setup(&desc).await); + + let cluster_len = test_data.clusters.len(); + let cluster_feed = ClusterFeed::load_from_clusters(test_data.clusters, &[]); + info!( + "Starting Small stress test {} - cluster size: {:?}", + test_data.test_prefix, cluster_len + ); + + let waiting_frames_before = + get_waiting_frames_count(WaitingFrameClause::JobPrefix(test_data.test_prefix.clone())) + .await; + assert_eq!(waiting_frames_before, desc.total_frames()); + + let start_time = SystemTime::now(); + // Run job dispatcher + assert_ok!(pipeline::run(cluster_feed).await); + + let duration = start_time.elapsed().unwrap().as_secs(); + info!("Processed Frames: {} at {}s", desc.total_frames(), duration); + info!( + "Host attempts: {}", + pipeline::HOSTS_ATTEMPTED.load(Ordering::Relaxed) + ); + info!( + "Wasted attempts: {}%", + (pipeline::WASTED_ATTEMPTS.load(Ordering::Relaxed) as f32 + / pipeline::HOSTS_ATTEMPTED.load(Ordering::Relaxed) as f32) + * 100.0 + ); + info!( + "Cluster rounds: {}", + cluster::CLUSTER_ROUNDS.load(Ordering::Relaxed) + ); + info!("HostCache hit ratio = {}%", host_cache::hit_ratio().await); + + let waiting_frames_after = + get_waiting_frames_count(WaitingFrameClause::JobPrefix(test_data.test_prefix.clone())) + .await; + + // Clean up test data + assert_ok!(tear_down(&test_data.test_prefix).await); + + // Ensure reminder is less than 10% + assert!(waiting_frames_after < (desc.total_frames() as f64 * 0.1) as usize); + } +} diff --git a/rust/crates/scheduler/tests/util.rs b/rust/crates/scheduler/tests/util.rs new file mode 100644 index 000000000..a24223ce1 --- /dev/null +++ b/rust/crates/scheduler/tests/util.rs @@ -0,0 +1,1068 @@ +use miette::Result; +use rand::{ + rngs::StdRng, + seq::{IteratorRandom, SliceRandom}, + Rng, SeedableRng, +}; +use scheduler::{ + cluster::Cluster, + cluster_key::{ClusterKey, Tag, TagType}, + config::{ + Config, DatabaseConfig, HostBookingStrategy, HostCacheConfig, LoggingConfig, QueueConfig, + RqdConfig, SchedulerConfig, + }, +}; +use std::time::Duration; +use uuid::Uuid; + +use std::sync::Arc; + +use sqlx::{postgres::PgPoolOptions, Pool, Postgres, Transaction}; +use tokio::sync::OnceCell; + +// Database connection configuration - hardcoded for testing +const TEST_DB_HOST: &str = "localhost"; +const TEST_DB_PORT: u16 = 5432; +const TEST_DB_NAME: &str = "cuebot"; +const TEST_DB_USER: &str = "cuebot"; +const TEST_DB_PASSWORD: &str = "cuebot_password"; +#[allow(dead_code)] +const SEED: [u8; 32] = [0; 32]; // Replace with your own seed + +static TEST_CONNECTION_POOL: OnceCell>> = OnceCell::const_new(); + +pub async fn test_connection_pool() -> Result>, sqlx::Error> { + let database_url = format!( + "postgresql://{}:{}@{}:{}/{}", + TEST_DB_USER, TEST_DB_PASSWORD, TEST_DB_HOST, TEST_DB_PORT, TEST_DB_NAME + ); + TEST_CONNECTION_POOL + .get_or_try_init(|| async { + let pool = PgPoolOptions::new() + .max_connections(2) + // .idle_timeout(Some(Duration::from_secs(1))) + // .acquire_timeout(Duration::from_secs(30)) + .connect(&database_url) + .await?; + Ok(Arc::new(pool)) + }) + .await + .map(Arc::clone) +} + +#[allow(dead_code)] +pub fn create_test_config() -> Config { + // let connection_url = format!( + // "postgresql://{}:{}@{}:{}/{}", + // TEST_DB_USER, TEST_DB_PASSWORD, TEST_DB_HOST, TEST_DB_PORT, TEST_DB_NAME + // ); + + let host_cache_config = HostCacheConfig { + update_stat_on_book: true, + ..Default::default() + }; + + Config { + logging: LoggingConfig { + level: "debug".to_string(), + path: "/tmp/scheduler_test.log".to_string(), + file_appender: false, + }, + queue: QueueConfig { + monitor_interval: Duration::from_secs(1), + // Won't influence tests as it's only read by main, + //for test use #[tokio::test(flavor = "multi_thread", worker_threads = 8)] + worker_threads: 2, + dispatch_frames_per_layer_limit: 8, // Small limit for testing + core_multiplier: 100, + memory_stranded_threshold: bytesize::ByteSize::mb(100), + job_back_off_duration: Duration::from_secs(10), + stream: scheduler::config::StreamConfig { + cluster_buffer_size: 4, + job_buffer_size: 8, + }, + manual_tags_chunk_size: 10, + hostname_tags_chunk_size: 20, + host_candidate_attemps_per_layer: 5, + empty_job_cycles_before_quiting: Some(20), + mem_reserved_min: bytesize::ByteSize::mb(250), + allocation_refresh_interval: Duration::from_secs(3), + selfish_services: Vec::new(), + host_booking_strategy: HostBookingStrategy { + core_saturation: true, + memory_saturation: false, + }, + frame_memory_soft_limit: 1.6, + frame_memory_hard_limit: 2.0, + }, + database: DatabaseConfig { + pool_size: 20, + core_multiplier: 100, + db_host: TEST_DB_HOST.to_string(), + db_name: TEST_DB_NAME.to_string(), + db_user: TEST_DB_USER.to_string(), + db_pass: TEST_DB_PASSWORD.to_string(), + db_port: TEST_DB_PORT, + }, + rqd: RqdConfig { + grpc_port: 8444, + dry_run_mode: true, // Always run in dry mode for tests + }, + host_cache: host_cache_config, + scheduler: SchedulerConfig::default(), + } +} + +#[derive(Debug)] +#[allow(dead_code)] +pub struct TestData { + pub test_prefix: String, + pub clusters: Vec, + pub jobs: Vec, + pub hosts: Vec, +} + +#[derive(Debug)] +#[allow(dead_code)] +pub struct TestHost { + id: Uuid, + name: String, + alloc_id: Uuid, +} + +#[derive(Debug)] +#[allow(dead_code)] +pub struct TestJob { + id: Uuid, + name: String, + layers: Vec, + frames_by_layer: usize, +} + +#[derive(Debug)] +#[allow(dead_code)] +struct TestLayer { + id: Uuid, + name: String, + tag: Vec, +} + +#[allow(dead_code)] +pub async fn clean_up_test_data(_test_prefix: &str) -> Result<(), sqlx::Error> { + // let pool = test_connection_pool().await?; + // let mut tx = pool.begin().await?; + + // // Delete proc (references frames) + // sqlx::query( + // "DELETE FROM proc WHERE pk_frame IN ( + // SELECT f.pk_frame FROM frame f + // JOIN layer l ON f.pk_layer = l.pk_layer + // JOIN job j ON f.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete frame_history (references frames) + // sqlx::query( + // "DELETE FROM frame_history WHERE pk_frame IN ( + // SELECT f.pk_frame FROM frame f + // JOIN layer l ON f.pk_layer = l.pk_layer + // JOIN job j ON f.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete frames (references layers) + // sqlx::query( + // "DELETE FROM frame WHERE pk_frame IN ( + // SELECT f.pk_frame FROM frame f + // JOIN layer l ON f.pk_layer = l.pk_layer + // JOIN job j ON f.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layer_output (references layers) + // sqlx::query( + // "DELETE FROM layer_output WHERE pk_layer IN ( + // SELECT l.pk_layer FROM layer l + // JOIN job j ON l.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layer_env (references layers) + // sqlx::query( + // "DELETE FROM layer_env WHERE pk_layer IN ( + // SELECT l.pk_layer FROM layer l + // JOIN job j ON l.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layer_mem (references layers) + // sqlx::query( + // "DELETE FROM layer_mem WHERE pk_layer IN ( + // SELECT l.pk_layer FROM layer l + // JOIN job j ON l.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layer_usage (references layers) + // sqlx::query( + // "DELETE FROM layer_usage WHERE pk_layer IN ( + // SELECT l.pk_layer FROM layer l + // JOIN job j ON l.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layer_stat (references layers) + // sqlx::query( + // "DELETE FROM layer_stat WHERE pk_layer IN ( + // SELECT l.pk_layer FROM layer l + // JOIN job j ON l.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layer_resource (references layers) + // sqlx::query( + // "DELETE FROM layer_resource WHERE pk_layer IN ( + // SELECT l.pk_layer FROM layer l + // JOIN job j ON l.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layer_history (references layers) + // sqlx::query( + // "DELETE FROM layer_history WHERE pk_layer IN ( + // SELECT l.pk_layer FROM layer l + // JOIN job j ON l.pk_job = j.pk_job + // WHERE j.str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete layers (references jobs) + // sqlx::query( + // "DELETE FROM layer WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_local (references jobs and hosts) + // sqlx::query( + // "DELETE FROM job_local WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_env (references jobs) + // sqlx::query( + // "DELETE FROM job_env WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_mem (references jobs) + // sqlx::query( + // "DELETE FROM job_mem WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_usage (references jobs) + // sqlx::query( + // "DELETE FROM job_usage WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_stat (references jobs) + // sqlx::query( + // "DELETE FROM job_stat WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_resource (references jobs) + // sqlx::query( + // "DELETE FROM job_resource WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_post (references jobs) + // sqlx::query( + // "DELETE FROM job_post WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete job_history (references jobs) + // sqlx::query( + // "DELETE FROM job_history WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete depend (references jobs) + // sqlx::query( + // "DELETE FROM depend WHERE pk_job_depend_on IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // ) OR pk_job_depend_er IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete comments (references jobs) + // sqlx::query( + // "DELETE FROM comments WHERE pk_job IN ( + // SELECT pk_job FROM job WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete jobs (references folders/shows/facilities/depts) + // sqlx::query("DELETE FROM job WHERE str_name LIKE $1") + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete host_local (references hosts) + // sqlx::query( + // "DELETE FROM host_local WHERE pk_host IN ( + // SELECT pk_host FROM host WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete host_tag (references hosts) + // sqlx::query( + // "DELETE FROM host_tag WHERE pk_host IN ( + // SELECT pk_host FROM host WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete host_stat (references hosts) + // sqlx::query( + // "DELETE FROM host_stat WHERE pk_host IN ( + // SELECT pk_host FROM host WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete deed (references hosts and owners) + // sqlx::query( + // "DELETE FROM deed WHERE pk_host IN ( + // SELECT pk_host FROM host WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete hosts (references allocations) + // sqlx::query("DELETE FROM host WHERE str_name LIKE $1") + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete owner (references shows) + // sqlx::query( + // "DELETE FROM owner WHERE pk_show IN ( + // SELECT pk_show FROM show WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete folder_resource (references folders) + // sqlx::query( + // "DELETE FROM folder_resource WHERE pk_folder IN ( + // SELECT pk_folder FROM folder WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete folders (references shows and depts) + // sqlx::query("DELETE FROM folder WHERE str_name LIKE $1") + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete subscriptions (references allocations and shows) + // sqlx::query( + // "DELETE FROM subscription WHERE pk_alloc IN ( + // SELECT pk_alloc FROM alloc WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete allocations (references facilities) + // sqlx::query("DELETE FROM alloc WHERE str_name LIKE $1") + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete show_service (references shows) + // sqlx::query( + // "DELETE FROM show_service WHERE pk_show IN ( + // SELECT pk_show FROM show WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete show_alias (references shows) + // sqlx::query( + // "DELETE FROM show_alias WHERE pk_show IN ( + // SELECT pk_show FROM show WHERE str_name LIKE $1 + // )", + // ) + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete shows + // sqlx::query("DELETE FROM show WHERE str_name LIKE $1") + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete departments + // sqlx::query("DELETE FROM dept WHERE str_name LIKE $1") + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // // Delete facilities + // sqlx::query("DELETE FROM facility WHERE str_name LIKE $1") + // .bind(format!("{}%", test_prefix)) + // .execute(&mut *tx) + // .await?; + + // tx.commit().await?; + Ok(()) +} + +#[allow(dead_code)] +pub async fn create_test_data( + test_name: &str, + test_id: &str, + job_count: usize, + host_count: usize, + layer_count: usize, + frames_per_layer_count: usize, + tag_count: usize, +) -> Result { + // assert!(tag_count >= 4, "Minimum tag_count is 4"); + + // Create basic entities + let facility_id = Uuid::new_v4(); + let dept_id = Uuid::new_v4(); + let show_id = Uuid::new_v4(); + let pool = test_connection_pool().await?; + let test_prefix = format!("{}_{}", test_name, test_id); + + let mut tx = pool.begin().await?; + + // Create facility + sqlx::query("INSERT INTO facility (pk_facility, str_name) VALUES ($1, $2)") + .bind(facility_id.to_string()) + .bind(format!("{}_facility", test_prefix)) + .execute(&mut *tx) + .await?; + + // Create department + sqlx::query("INSERT INTO dept (pk_dept, str_name) VALUES ($1, $2)") + .bind(dept_id.to_string()) + .bind(format!("{}_dept", test_prefix)) + .execute(&mut *tx) + .await?; + + // Create show + sqlx::query("INSERT INTO show (pk_show, str_name) VALUES ($1, $2)") + .bind(show_id.to_string()) + .bind(format!("{}_show", test_prefix)) + .execute(&mut *tx) + .await?; + + // Manual Tags + let mut tags = Vec::new(); + let mut clusters = Vec::new(); + let mut tag_chunks: Vec> = Vec::new(); + if tag_count > 0 { + for i in 1..=tag_count { + tags.push(format!("{}_{}", test_prefix, i)); + } + + // Clusters. Chunk manual tags in approximatelly 4 groups + for chunk in tags.chunks(tags.len() / 4) { + let cluster = Cluster::TagsKey( + facility_id, + chunk + .iter() + .map(|tag_name| Tag { + name: tag_name.clone(), + ttype: TagType::Manual, + }) + .collect(), + ); + clusters.push(cluster); + } + + // Chunck tags to the number of hosts + let tags_per_chunk = tags.len().div_ceil(host_count); + tag_chunks = tags + .chunks(tags_per_chunk) + .map(|chunk| chunk.iter().map(|tag| (tag.clone(), "MANUAL")).collect()) + .collect(); + } + // Create allocations for different tag types + let alloc_tags = vec![ + format!("{}_a1", test_prefix), + format!("{}_a2", test_prefix), + format!("{}_a3", test_prefix), + ]; + let allocs = [ + create_allocation(&mut tx, facility_id, &format!("{}_a1", test_prefix)).await?, + create_allocation(&mut tx, facility_id, &format!("{}_a2", test_prefix)).await?, + create_allocation(&mut tx, facility_id, &format!("{}_a3", test_prefix)).await?, + ]; + + for (alloc_id, alloc_name) in allocs.iter() { + let cluster = Cluster::ComposedKey(ClusterKey { + facility_id, + show_id, + tag: Tag { + name: alloc_name.clone(), + ttype: TagType::Alloc, + }, + }); + clusters.push(cluster); + create_subscription(&mut tx, *alloc_id, show_id, 10000 * 100, 990000 * 100).await?; + } + + // Create folder + let folder_id = create_folder( + &mut tx, + show_id, + dept_id, + &format!("{}_folder", test_prefix), + ) + .await?; + + tx.commit().await?; + + let mut rng = StdRng::from_seed(SEED); + + // Create hosts + let mut hosts = Vec::new(); + for i in 0..host_count { + let (curr_alloc_id, curr_alloc_tag) = allocs.choose(&mut rng).unwrap(); + + let mut host_tags: Vec<_> = if !tag_chunks.is_empty() { + // Ensure each tag exist in at least one host + if i < tag_chunks.len() { + tag_chunks[i].clone() + } else { + // The following hosts shall have 0-3 randomly selected manual tags + let num_custom_tags = rng.gen_range(0..3); + tags.iter() + .choose_multiple(&mut rng, num_custom_tags) + .into_iter() + .map(|tag| (tag.clone(), "MANUAL")) + .collect() + } + } else { + Vec::new() + }; + // Each host shall have a single ALLOC tag + host_tags.push((curr_alloc_tag.clone(), "ALLOC")); + + let cores_range: Vec<_> = (16..=512).step_by(8).collect(); + let cores = *cores_range.choose(&mut rng).unwrap(); + let memory = rng.gen_range(30..=200); + + let host = create_host( + *curr_alloc_id, + &format!("{}_host{}", test_prefix, i), + cores, + memory * 1024 * 1024, + 4, + 8 * 1024 * 1024, + host_tags, + ) + .await?; + hosts.push(host); + } + + // Create Jobs + let mut jobs = Vec::new(); + tags.extend(alloc_tags); + for i in 0..job_count { + let job = create_job_scenario( + &format!("{}_{}", test_prefix, i), + show_id, + facility_id, + dept_id, + folder_id, + layer_count, + frames_per_layer_count, + &tags, + ) + .await?; + jobs.push(job); + } + + Ok(TestData { + test_prefix: test_prefix.to_string(), + clusters, + jobs, + hosts, + }) +} + +#[allow(dead_code)] +async fn create_allocation( + pool: &mut Transaction<'static, Postgres>, + facility_id: Uuid, + name: &str, +) -> Result<(Uuid, String), sqlx::Error> { + let alloc_id = Uuid::new_v4(); + + sqlx::query( + "INSERT INTO alloc (pk_alloc, str_name, pk_facility, str_tag) VALUES ($1, $2, $3, $4)", + ) + .bind(alloc_id.to_string()) + .bind(name) + .bind(facility_id.to_string()) + .bind(name) + .execute(&mut **pool) + .await?; + Ok((alloc_id, name.to_string())) +} + +async fn create_subscription( + pool: &mut Transaction<'static, Postgres>, + alloc_id: Uuid, + show_id: Uuid, + size: i64, + burst: i64, +) -> Result<(), sqlx::Error> { + let subscription_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO subscription \ + (pk_subscription, pk_alloc, pk_show, int_size, int_burst) \ + VALUES ($1, $2, $3, $4, $5)", + ) + .bind(subscription_id.to_string()) + .bind(alloc_id.to_string()) + .bind(show_id.to_string()) + .bind(size) + .bind(burst) + .execute(&mut **pool) + .await?; + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn create_host( + alloc_id: Uuid, + name: &str, + cores: i64, + memory_kb: i64, + gpus: i64, + gpu_memory_kb: i64, + tags: Vec<(String, &str)>, +) -> Result { + let pool = test_connection_pool().await?; + let host_id = Uuid::new_v4(); + + // Create host + sqlx::query( + "INSERT INTO host \ + (pk_host, pk_alloc, str_name, str_lock_state, int_cores, int_cores_idle, int_mem, int_mem_idle, int_gpus, int_gpus_idle, int_gpu_mem, int_gpu_mem_idle, int_thread_mode) \ + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)" + ) + .bind(host_id.to_string()) + .bind(alloc_id.to_string()) + .bind(name) + .bind("OPEN") + .bind(cores * 100) // Core multiplier + .bind(cores * 100) + .bind(memory_kb) + .bind(memory_kb) + .bind(gpus) + .bind(gpus) + .bind(gpu_memory_kb) + .bind(gpu_memory_kb) + .bind(0) // ThreadMode::Auto + .execute(&*pool) + .await?; + + // Create host_stat + let host_stat_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO host_stat (pk_host_stat, pk_host, str_state, str_os, int_mem_total, int_mem_free, int_gpu_mem_total, int_gpu_mem_free) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)" + ) + .bind(host_stat_id.to_string()) + .bind(host_id.to_string()) + .bind("UP") + .bind("linux") + .bind(memory_kb) + .bind(memory_kb) + .bind(gpu_memory_kb) + .bind(gpu_memory_kb) + .execute(&*pool) + .await?; + + // Create host tags + for (tag_name, tag_type) in tags { + let tag_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO host_tag (pk_host_tag, pk_host, str_tag, str_tag_type) VALUES ($1, $2, $3, $4)" + ) + .bind(tag_id.to_string()) + .bind(host_id.to_string()) + .bind(tag_name) + .bind(tag_type) + .execute(&*pool) + .await?; + } + + Ok(TestHost { + id: host_id, + name: name.to_string(), + alloc_id, + }) +} + +async fn create_folder( + pool: &mut Transaction<'static, Postgres>, + show_id: Uuid, + dept_id: Uuid, + name: &str, +) -> Result { + let folder_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO folder (pk_folder, pk_show, pk_dept, str_name) VALUES ($1, $2, $3, $4)", + ) + .bind(folder_id.to_string()) + .bind(show_id.to_string()) + .bind(dept_id.to_string()) + .bind(name) + .execute(&mut **pool) + .await?; + Ok(folder_id) +} + +#[allow(clippy::too_many_arguments)] +async fn create_job_scenario( + job_prefix: &str, + show_id: Uuid, + facility_id: Uuid, + dept_id: Uuid, + folder_id: Uuid, + layer_count: usize, + frames_per_layer_count: usize, + tags: &[String], +) -> Result { + let pool = test_connection_pool().await?; + let mut tx = pool.begin().await?; + let job_id = Uuid::new_v4(); + let job_name = format!("{}_job", job_prefix); + + // Create job + sqlx::query( + "INSERT INTO job (pk_job, pk_folder, pk_show, pk_facility, pk_dept, str_name, str_visible_name, str_shot, str_user, str_state, str_os) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)" + ) + .bind(job_id.to_string()) + .bind(folder_id.to_string()) + .bind(show_id.to_string()) + .bind(facility_id.to_string()) + .bind(dept_id.to_string()) + .bind(&job_name) + .bind(&job_name) + .bind(format!("integ_test_shot_{}", job_name.split('_').next_back().unwrap_or("default"))) + .bind(format!("integ_test_user_{}", job_name.split('_').next_back().unwrap_or("default"))) + .bind("PENDING") + .bind("linux") + .execute(&mut *tx) + .await?; + + // Create job stats with waiting frames + // First check if job_stat already exists for this job (might be created by triggers) + let existing_job_stat = + sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM job_stat WHERE pk_job = $1") + .bind(job_id.to_string()) + .fetch_one(&mut *tx) + .await?; + + let total_waiting_frames = layer_count * frames_per_layer_count; + if existing_job_stat == 0 { + sqlx::query( + "INSERT INTO job_stat (pk_job_stat, pk_job, int_waiting_count) VALUES ($1, $2, $3)", + ) + .bind(Uuid::new_v4().to_string()) + .bind(job_id.to_string()) + .bind(total_waiting_frames as i64) + .execute(&mut *tx) + .await?; + } else { + // Update existing job_stat + sqlx::query("UPDATE job_stat SET int_waiting_count = $1 WHERE pk_job = $2") + .bind(total_waiting_frames as i64) + .bind(job_id.to_string()) + .execute(&mut *tx) + .await?; + } + + // Create job resource + // First check if job_resource already exists for this job (might be created by triggers) + let existing_job_resource = + sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM job_resource WHERE pk_job = $1") + .bind(job_id.to_string()) + .fetch_one(&mut *tx) + .await?; + + if existing_job_resource == 0 { + sqlx::query( + "INSERT INTO job_resource (pk_job_resource, pk_job, int_priority, int_max_cores) VALUES ($1, $2, $3, $4)", + ) + .bind(Uuid::new_v4().to_string()) + .bind(job_id.to_string()) + .bind(1) + .bind(90000) + .execute(&mut *tx) + .await?; + } else { + // Update existing job_resource + sqlx::query( + "UPDATE job_resource SET int_priority = $1, int_max_cores = $2 WHERE pk_job = $3", + ) + .bind(1) + .bind(90000) + .bind(job_id.to_string()) + .execute(&mut *tx) + .await?; + } + + let mut rng = StdRng::from_seed(SEED); + let mut test_layers = Vec::new(); + + for layer_index in 0..layer_count { + let layer_id = Uuid::new_v4(); + let layer_name = &format!("{}_layer-{}", job_prefix, layer_index); + + let num_tags = rng.gen_range(1..=3); + let layer_tags: Vec<_> = tags.choose_multiple(&mut rng, num_tags).cloned().collect(); + let cores_range: Vec = (4..=56).step_by(4).collect(); + let min_cores: usize = *cores_range.choose(&mut rng).unwrap(); + let memory = rng.gen_range(4..=32); + + // &format!("integ_test_mixed_hostname_{}", test_suffix), + // &format!("integ_test_hostname_tag_{}", test_suffix), + // 1, + // 1024 * 1024, + // 0, + // 0, + // Create layer + sqlx::query( + "INSERT INTO layer \ + (pk_layer, pk_job, str_name, str_cmd, str_range, str_tags, str_type, int_cores_min, int_mem_min, int_gpus_min, int_gpu_mem_min) \ + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)" + ) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .bind(layer_name) + .bind("echo 'Integration test frame'") + .bind(format!("1-{}", frames_per_layer_count)) + .bind(layer_tags.join(" | ")) + .bind("PRE") // Default layer type + .bind(min_cores as i64 * 100) // Core multiplier + .bind(memory * 1024 * 1024) + .bind(0) + .bind(0) + .execute(&mut *tx) + .await?; + + // Create layer stats + sqlx::query( + "INSERT INTO layer_stat (pk_layer_stat, pk_layer, pk_job, int_waiting_count, int_total_count) VALUES ($1, $2, $3, $4, $5) ON CONFLICT (pk_layer) DO UPDATE SET int_waiting_count = EXCLUDED.int_waiting_count, int_total_count = EXCLUDED.int_total_count" + ) + .bind(Uuid::new_v4().to_string()) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .bind(frames_per_layer_count as i64) + .bind(frames_per_layer_count as i64) + .execute(&mut *tx) + .await?; + + // Create layer resource + // Check if layer_resource already exists for this layer (might be created by triggers) + let existing_layer_resource = + sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM layer_resource WHERE pk_layer = $1") + .bind(layer_id.to_string()) + .fetch_one(&mut *tx) + .await?; + + if existing_layer_resource == 0 { + sqlx::query( + "INSERT INTO layer_resource (pk_layer_resource, pk_layer, pk_job) VALUES ($1, $2, $3)" + ) + .bind(Uuid::new_v4().to_string()) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .execute(&mut *tx) + .await?; + } + + // Create frames (1-3) + for frame_num in 1..=frames_per_layer_count as i32 { + let frame_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO frame (pk_frame, pk_layer, pk_job, str_name, str_state, int_number, int_layer_order, int_dispatch_order, ts_updated) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, CURRENT_TIMESTAMP)" + ) + .bind(frame_id.to_string()) + .bind(layer_id.to_string()) + .bind(job_id.to_string()) + .bind(format!("{}-{}", frame_num, layer_name)) + .bind("WAITING") + .bind(frame_num) + .bind(frame_num) + .bind(frame_num) + .execute(&mut *tx) + .await?; + } + + test_layers.push(TestLayer { + id: layer_id, + name: layer_name.to_string(), + tag: layer_tags, + }); + } + tx.commit().await?; + + Ok(TestJob { + id: job_id, + name: job_name.to_string(), + layers: test_layers, + frames_by_layer: frames_per_layer_count, + }) +} + +#[allow(dead_code)] +pub enum WaitingFrameClause { + JobId(Uuid), + All, + JobPrefix(String), +} + +#[allow(dead_code)] +pub async fn get_waiting_frames_count(clause: WaitingFrameClause) -> usize { + let pool = test_connection_pool().await.unwrap(); + match clause { + WaitingFrameClause::JobId(job_id) => { + sqlx::query_scalar::<_, i64>("SELECT int_waiting_count FROM job_stat WHERE pk_job = $1") + .bind(job_id.to_string()) + .fetch_one(&*pool) + .await + .expect("Failed to query job stats") as usize + } + WaitingFrameClause::All => { + sqlx::query_scalar::<_, i32>("SELECT sum(int_waiting_count)::INTEGER FROM job_stat") + .fetch_one(&*pool) + .await + .expect("Failed to query job stats") as usize + } + WaitingFrameClause::JobPrefix(prefix) => sqlx::query_scalar::<_, i32>( + "SELECT sum(job_stat.int_waiting_count)::INTEGER \ + FROM job_stat JOIN job ON job.pk_job = job_stat.pk_job \ + WHERE job.str_name LIKE $1", + ) + .bind(format!("{}%", prefix)) + .fetch_one(&*pool) + .await + .expect("Failed to query job stats") + as usize, + } +}