feature: envoy ai gateway integration

googs1025 · googs1025 · commit 13b20806650f · 2025-11-06T13:35:19.000+08:00
Signed-off-by: googs1025 &lt;googs1025@gmail.com&gt;
diff --git a/dist/chart/templates/gateway-instance/gateway.yaml b/dist/chart/templates/gateway-instance/gateway.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.gateway.enable }}
 apiVersion: gateway.networking.k8s.io/v1
 kind: GatewayClass
 metadata:
@@ -156,3 +157,4 @@ spec:
           connect_timeout: {{ .Values.gateway.envoyPatchPolicy.route.connectTimeout }}
           lb_policy: CLUSTER_PROVIDED
           dns_lookup_family: V4_ONLY
+{{- end }}
diff --git a/dist/chart/templates/gateway-plugin/deployment.yaml b/dist/chart/templates/gateway-plugin/deployment.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.gateway.enable }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -98,3 +99,4 @@ spec:
           readinessProbe:
             {{- toYaml .Values.gatewayPlugin.container.probes.readiness | nindent 12 }}
       serviceAccountName: aibrix-gateway-plugins
+{{- end }}
diff --git a/dist/chart/templates/gateway-plugin/envoy_extension_policy.yaml b/dist/chart/templates/gateway-plugin/envoy_extension_policy.yaml
@@ -1,4 +1,5 @@
 # templates/envoy-extension-policy.yaml
+{{- if .Values.gateway.enable }}
 apiVersion: gateway.envoyproxy.io/v1alpha1
 kind: EnvoyExtensionPolicy
 metadata:
@@ -35,4 +36,5 @@ spec:
   targetRef:
     group: gateway.networking.k8s.io
     kind: HTTPRoute
-    name: aibrix-reserved-router-metadata-endpoint
+    name: aibrix-reserved-router-metadata-endpoint
+{{- end }}
diff --git a/dist/chart/templates/gateway-plugin/httproute.yaml b/dist/chart/templates/gateway-plugin/httproute.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.gateway.enable }}
 apiVersion: gateway.networking.k8s.io/v1
 kind: HTTPRoute
 metadata:
@@ -55,3 +56,4 @@ spec:
       backendRefs:
         - name: aibrix-metadata-service
           port: 8090
+{{- end }}
diff --git a/dist/chart/templates/gateway-plugin/rbac.yaml b/dist/chart/templates/gateway-plugin/rbac.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.gateway.enable }}
 ---
 apiVersion: v1
 kind: ServiceAccount
@@ -65,3 +66,4 @@ subjects:
   - kind: ServiceAccount
     name: aibrix-gateway-plugins
     namespace: {{ .Release.Namespace }}
+{{- end }}
diff --git a/dist/chart/templates/gateway-plugin/service.yaml b/dist/chart/templates/gateway-plugin/service.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.gateway.enable }}
 apiVersion: v1
 kind: Service
 metadata:
@@ -24,3 +25,4 @@ spec:
     - name: metrics
       port: 8080
       targetPort: 8080
+{{- end }}
diff --git a/dist/chart/values.yaml b/dist/chart/values.yaml
@@ -102,6 +102,7 @@ gpuOptimizer:
   tolerations: []
 
 gateway:
+  enable: true
   envoyProxy:
     replicas: 1
     imagePullSecrets: []
diff --git a/samples/ai-gateway-integration/README.md b/samples/ai-gateway-integration/README.md
@@ -0,0 +1,218 @@
+# Aibrix Integration with Envoy AI Gateway Deployment Guide
+
+This guide walks you through deploying a multi-model AI inference gateway using **Envoy AI Gateway**, **Gateway API Inference Extension**, and custom Aibrix-branded routing rules.
+
+### Project Structure
+
+```bash
+samples/ai-gateway-integration
+├── gateway.yaml                  # GatewayClass + Gateway
+├── aigatewayroute.yaml           # Multi-model routing rules (llama2-7b, mistral-7b)
+├── llama-7b-inferencepool.yaml   # InferencePool + EPP for Llama2-7B
+├── mistral-7b-inferencepool.yaml # InferencePool + EPP for Mistral-7B
+└── llama-7b.yaml                 # Mock model llama-7b deployments
+└── mistral-7b.yaml               # Mock model mistral-7b deployments
+```
+
+### Prerequisites
+- Kubernetes cluster (v1.24+)
+- kubectl configured
+- helm v3.8+
+- Internet access to pull images from docker.io and GitHub
+
+### Installation Steps
+
+1. Install Aibrix Custom Application (Optional)
+   If you have an internal Aibrix Helm chart:
+
+If you have an internal Aibrix [Helm chart](../../dist/chart):
+```bash
+helm install aibrix dist/chart -n aibrix-system --create-namespace
+```
+
+> **Note**: If you are using an internal Aibrix Helm chart, **you must set `gateway.enable: false`** in `values.yaml`.  
+> This is critical because **Steps 2–5 below will install the AI Gateway controller and Envoy data plane independently**. 
+> Enabling the built-in gateway here would cause resource conflicts or duplicate deployments.
+
+```yaml
+...
+gateway:
+  enable: false  # ← Set this to false to skip internal gateway deployment
+...
+```
+
+2. Install AI Gateway CRDs
+
+```bash
+helm upgrade -i aieg-crd oci://docker.io/envoyproxy/ai-gateway-crds-helm \
+  --version v0.0.0-latest \
+  --namespace envoy-ai-gateway-system \
+  --create-namespace
+```
+
+> For more details, see the official [installation guide](https://aigateway.envoyproxy.io/docs/getting-started/installation#step-1-install-ai-gateway-crds) for AI Gateway CRDs.
+
+
+3. Install AI Gateway Controller
+
+```bash
+helm upgrade -i aieg oci://docker.io/envoyproxy/ai-gateway-helm \
+  --version v0.0.0-latest \
+  --namespace envoy-ai-gateway-system \
+  --create-namespace
+```
+
+> For more details, see the official [installation guide](https://aigateway.envoyproxy.io/docs/getting-started/installation#step-2-install-ai-gateway-resources) for AI Gateway Resources.
+
+Wait for the controller to be ready:
+```bash
+kubectl wait --timeout=2m -n envoy-ai-gateway-system deployment/ai-gateway-controller --for=condition=Available
+```
+
+4. Install Gateway API Inference Extension (EPP Framework)
+
+```bash
+kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml
+```
+
+> For more details, see the official [installation guide](https://aigateway.envoyproxy.io/docs/capabilities/inference/httproute-inferencepool#step-1-install-gateway-api-inference-extension) for Gateway API Inference Extension.
+
+
+This deploys:
+CRDs (InferencePool, InferenceObjective)
+RBAC, webhooks, and core controllers
+
+5. Install Envoy Gateway (Data Plane)
+
+```bash
+helm upgrade -i eg oci://docker.io/envoyproxy/gateway-helm \
+  --version v0.0.0-latest \
+  --namespace envoy-gateway-system \
+  --create-namespace \
+  -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-values.yaml \
+  -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/inference-pool/envoy-gateway-values-addon.yaml
+```
+
+> For more details, see the official [installation guide](https://aigateway.envoyproxy.io/docs/getting-started/prerequisites#additional-features-rate-limiting-inferencepool-etc) for Envoy Gateway.
+
+
+6. Deploy Aibrix AI Gateway Resources
+
+Apply your custom gateway and routing configuration:
+
+```bash
+cd samples/ai-gateway-integration
+
+# Deploy for each model
+kubectl apply -f llama-7b.yaml
+kubectl apply -f mistral-7b.yaml
+
+# Deploy GatewayClass, Gateway, and AIGatewayRoute
+kubectl apply -f gateway.yaml
+kubectl apply -f aigatewayroute.yaml
+
+# Deploy backend resources for each model
+kubectl apply -f llama-7b-inferencepool.yaml
+kubectl apply -f mistral-7b-inferencepool.yaml
+```
+
+### Verify Deployment Status
+
+After installation, you can verify that all components are running correctly. Below is an example of expected output from a successful deployment:
+
+-  Pods in `aibrix-system`
+```bash
+$ kubectl get pods -n aibrix-system
+NAME                                         READY   STATUS    RESTARTS   AGE
+aibrix-controller-manager-7dcf4b8d97-9mgw8   1/1     Running   0          3h35m
+aibrix-gpu-optimizer-556d946fbb-gzh85        1/1     Running   0          3h35m
+aibrix-metadata-service-bdfd4459d-678k5      1/1     Running   0          3h35m
+aibrix-redis-master-74945dc65d-sr2sq         1/1     Running   0          3h35m
+```
+
+- Pods in `envoy-ai-gateway-system`
+```bash
+$ kubectl get pods -n envoy-ai-gateway-system
+NAME                                     READY   STATUS    RESTARTS   AGE
+ai-gateway-controller-5558c7cf7c-bzh65   1/1     Running   0          3h34m 
+```
+
+- Pods in `envoy-gateway-system
+```bash
+$ kubectl get pods -n envoy-gateway-system
+NAME                                                       READY   STATUS    RESTARTS   AGE
+envoy-default-aibrix-ai-gateway-588291e8-54d5f9b6f-2psp6   3/3     Running   0          128m
+envoy-gateway-6dd8f9b8f-kjngn                              1/1     Running   0          3h33m 
+```
+
+- AI Gateway CRDs
+```bash
+$ kubectl get InferencePool
+NAME         AGE
+llama2-7b    121m
+mistral-7b   121m
+
+$ kubectl get InferenceObjective
+NAME         INFERENCE POOL   PRIORITY   AGE
+llama2-7b    llama2-7b        10         121m
+mistral-7b   mistral-7b       10         121m 
+```
+
+- Model and EPP Backend Pods (in default namespace)
+
+```bash
+$ kubectl get pods
+NAME                               READY   STATUS    RESTARTS   AGE
+llama2-7b-epp-6fb99fd7df-7xlxq     1/1     Running   0          121m
+mistral-7b-epp-7c7f7fcb66-bw87d    1/1     Running   0          121m
+mock-llama2-7b-6444f9b459-7gzmx    1/1     Running   0          131m
+mock-llama2-7b-6444f9b459-92bsl    1/1     Running   0          131m
+mock-llama2-7b-6444f9b459-krj8c    1/1     Running   0          131m
+mock-mistral-7b-5fddcff595-5268f   1/1     Running   0          131m
+mock-mistral-7b-5fddcff595-t65cp   1/1     Running   0          131m
+```
+
+### Test the Setup
+
+Once all pods are ready, test routing via curl:
+
+- Llama2-7B
+
+```bash
+curl -v http://<GATEWAY_IP>/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "x-ai-eg-model: llama2-7b" \
+  -H "Authorization: Bearer test-key-1234567890" \
+  -d '{
+        "model": "llama2-7b",
+        "messages": [{"role": "user", "content": "Say this is a test!"}],
+        "temperature": 0.7
+      }'
+```
+
+- Mistral-7B
+
+```bash
+curl -v http://<GATEWAY_IP>/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "x-ai-eg-model: mistral-7b" \
+  -H "Authorization: Bearer test-key-0987654321" \
+  -d '{
+        "model": "mistral-7b",
+        "messages": [{"role": "user", "content": "Say this is a test!"}],
+        "temperature": 0.7
+      }'
+```
+
+Replace `<GATEWAY_IP>` with:
+- localhost:8080 if using
+
+```bash
+kubectl port-forward -n envoy-gateway-system svc/eg-envoy 8080:80
+```
+
+Or the external IP of the `eg-envoy` Service if exposed via LoadBalancer.
+
+### References
+- [Envoy AI Gateway](https://github.com/envoyproxy/ai-gateway)
+- [Gateway API Inference Extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension)
diff --git a/samples/ai-gateway-integration/aigatewayroute.yaml b/samples/ai-gateway-integration/aigatewayroute.yaml
@@ -0,0 +1,38 @@
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIGatewayRoute
+metadata:
+  name: multi-model-route
+  namespace: default
+spec:
+  parentRefs:
+    # References the Gateway defined
+    - name: aibrix-ai-gateway
+      kind: Gateway
+      group: gateway.networking.k8s.io
+  rules:
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model               # Custom header used to specify the target model
+              value: llama2-7b
+            - type: Exact
+              name: Authorization               # Validates API key
+              value: Bearer test-key-1234567890
+      backendRefs:
+        # Must match the InferencePool name
+        - group: inference.networking.k8s.io
+          kind: InferencePool
+          name: llama2-7b
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model
+              value: mistral-7b
+            - type: Exact
+              name: Authorization
+              value: Bearer test-key-0987654321   # different key
+      backendRefs:
+        - group: inference.networking.k8s.io
+          kind: InferencePool
+          name: mistral-7b
+
diff --git a/samples/ai-gateway-integration/gateway.yaml b/samples/ai-gateway-integration/gateway.yaml
@@ -0,0 +1,19 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  name: aibrix-ai-gateway-class # Unique name for this gateway class (branded with Aibrix)
+spec:
+  controllerName: gateway.envoyproxy.io/gatewayclass-controller # Envoy Gateway controller identifier
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: aibrix-ai-gateway
+  namespace: default
+spec:
+  # Must match the GatewayClass name above
+  gatewayClassName: aibrix-ai-gateway-class
+  listeners:
+    - name: http
+      protocol: HTTP
+      port: 80
diff --git a/samples/ai-gateway-integration/llama-7b-inferencepool.yaml b/samples/ai-gateway-integration/llama-7b-inferencepool.yaml
diff --git a/samples/ai-gateway-integration/llama-7b.yaml b/samples/ai-gateway-integration/llama-7b.yaml
diff --git a/samples/ai-gateway-integration/mistral-7b-inferencepool.yaml b/samples/ai-gateway-integration/mistral-7b-inferencepool.yaml
diff --git a/samples/ai-gateway-integration/mistral-7b.yaml b/samples/ai-gateway-integration/mistral-7b.yaml