diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel
index e47e0426e05..0a8bc2e4d1a 100644
--- a/src/workerd/api/BUILD.bazel
+++ b/src/workerd/api/BUILD.bazel
@@ -440,14 +440,15 @@ wd_cc_library(
         "encoding-shared.h",
     ],
     implementation_deps = [
-        "//src/workerd/io:features",
         "//src/workerd/util:strings",
+        "@simdutf",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":util",
         "//src/rust/encoding",
         "//src/workerd/io:compatibility-date_capnp",
+        "//src/workerd/io:features",
         "//src/workerd/jsg",
         "@capnp-cpp//src/kj",
         "@simdutf",
@@ -624,6 +625,14 @@ kj_test(
     ],
 )
 
+kj_test(
+    src = "encoding-test.c++",
+    deps = [
+        ":encoding",
+        "//src/workerd/io",
+    ],
+)
+
 kj_test(
     src = "base64-test.c++",
     deps = ["//src/workerd/tests:test-fixture"],
diff --git a/src/workerd/api/encoding-test.c++ b/src/workerd/api/encoding-test.c++
new file mode 100644
index 00000000000..ed1b7fb6593
--- /dev/null
+++ b/src/workerd/api/encoding-test.c++
@@ -0,0 +1,90 @@
+// Copyright (c) 2025 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+
+#include "encoding.h"
+
+#include <kj/test.h>
+
+namespace workerd::api {
+namespace test {
+
+// These tests verify the findBestFit() function used by TextEncoder.encodeInto().
+//
+// bestFit(input, bufferSize) returns the number of input code units that can be
+// fully converted to UTF-8 and fit within the given output buffer size in bytes.
+//
+// Different characters expand to different UTF-8 byte lengths:
+//   - ASCII (U+0000-U+007F): 1 byte per code unit
+//   - Latin-1 extended (U+0080-U+00FF): 2 bytes per code unit
+//   - BMP characters (U+0100-U+FFFF): 2-3 bytes per code unit
+//   - Supplementary characters (U+10000+): 4 bytes, encoded as surrogate pairs in UTF-16
+//
+// The function must never split a surrogate pair, so if there's only room for part of
+// a multi-byte character, it stops before that character.
+KJ_TEST("BestFitASCII") {
+  // If there's zero input or output space, the answer is zero.
+  KJ_ASSERT(bestFit("", 0) == 0);
+  KJ_ASSERT(bestFit("a", 0) == 0);
+  KJ_ASSERT(bestFit("aa", 0) == 0);
+  KJ_ASSERT(bestFit("aaa", 0) == 0);
+  KJ_ASSERT(bestFit("aaaa", 0) == 0);
+  KJ_ASSERT(bestFit("aaaaa", 0) == 0);
+  KJ_ASSERT(bestFit("", 0) == 0);
+  KJ_ASSERT(bestFit("", 1) == 0);
+  KJ_ASSERT(bestFit("", 2) == 0);
+  KJ_ASSERT(bestFit("", 3) == 0);
+  KJ_ASSERT(bestFit("", 4) == 0);
+  KJ_ASSERT(bestFit("", 5) == 0);
+  // Zero cases with two-byte strings.
+  KJ_ASSERT(bestFit(u"", 0) == 0);
+  KJ_ASSERT(bestFit(u"€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€€€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"", 0) == 0);
+  KJ_ASSERT(bestFit(u"", 1) == 0);
+  KJ_ASSERT(bestFit(u"", 2) == 0);
+  KJ_ASSERT(bestFit(u"", 3) == 0);
+  KJ_ASSERT(bestFit(u"", 4) == 0);
+  KJ_ASSERT(bestFit(u"", 5) == 0);
+  // Small buffers that only just fit.
+  KJ_ASSERT(bestFit(u"a", 1) == 1);
+  KJ_ASSERT(bestFit(u"å", 2) == 1);
+  KJ_ASSERT(bestFit(u"€", 3) == 1);
+  KJ_ASSERT(bestFit(u"😹", 4) == 2);
+  // Small buffers that don't fit.
+  KJ_ASSERT(bestFit(u"å", 1) == 0);
+  KJ_ASSERT(bestFit(u"€", 2) == 0);
+  KJ_ASSERT(bestFit(u"😹", 3) == 0);
+  // Don't chop a surrogate pair.
+  KJ_ASSERT(bestFit(u"1😹", 4) == 1);
+  KJ_ASSERT(bestFit(u"12😹", 5) == 2);
+  KJ_ASSERT(bestFit(u"123😹", 6) == 3);
+  KJ_ASSERT(bestFit(u"1234😹", 7) == 4);
+  KJ_ASSERT(bestFit(u"12345😹", 8) == 5);
+  // Some bigger ones just for fun.
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 0) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 1) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 2) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 3) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 4) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 5) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 6) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 7) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 8) == 4);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 9) == 4);
+  KJ_ASSERT(bestFit(u"0😹😹😹😹😹😹", 9) == 5);          // 0😹😹 is 5 and takes 9.
+  KJ_ASSERT(bestFit(u"01😹😹😹😹😹😹", 9) == 4);         // 01😹 is 4 and takes 6.
+  KJ_ASSERT(bestFit(u"012😹😹😹😹😹😹", 9) == 5);        // 012😹 is 5 and takes 7.
+  KJ_ASSERT(bestFit(u"0123😹😹😹😹😹😹", 9) == 6);       // 0123😹 is 6 and takes 8.
+  KJ_ASSERT(bestFit(u"01234😹😹😹😹😹😹", 9) == 7);      // 01234😹 is 7 and takes 9.
+  KJ_ASSERT(bestFit(u"012345😹😹😹😹😹😹", 9) == 6);     // 012345 is 6 and takes 6.
+  KJ_ASSERT(bestFit(u"0123456😹😹😹😹😹😹", 9) == 7);    // 0123456 is 7 and takes 7.
+  KJ_ASSERT(bestFit(u"01234567😹😹😹😹😹😹", 9) == 8);   // 0123456 is 8 and takes 8.
+  KJ_ASSERT(bestFit(u"012345678😹😹😹😹😹😹", 9) == 9);  // 0123456 is 9 and takes 9.
+}
+
+}  // namespace test
+}  // namespace workerd::api
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 958f928f7e4..19f7712e52a 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -9,12 +9,15 @@
 
 #include <workerd/io/features.h>
 #include <workerd/jsg/jsg.h>
+#include <workerd/util/autogate.h>
 #include <workerd/util/strings.h>
 
 #include <unicode/ucnv.h>
 #include <unicode/utf8.h>
+#include <v8.h>
 
 #include <kj/array.h>
+#include <kj/string.h>
 
 namespace workerd::api {
 
@@ -274,6 +277,9 @@ Encoding getEncodingForLabel(kj::StringPtr label) {
 #undef V
   return Encoding::INVALID;
 }
+
+constexpr int MAX_SIZE_FOR_STACK_ALLOC = 4096;
+
 }  // namespace
 
 const kj::Array<const kj::byte> TextDecoder::EMPTY =
@@ -516,34 +522,261 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
   return js.alloc<TextEncoder>();
 }
 
+jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
+  if (!workerd::util::Autogate::isEnabled(workerd::util::AutogateKey::ENABLE_FAST_TEXTENCODER)) {
+    auto str = input.orDefault(js.str());
+    auto view = JSG_REQUIRE_NONNULL(jsg::BufferSource::tryAlloc(js, str.utf8Length(js)), RangeError,
+        "Cannot allocate space for TextEncoder.encode");
+    [[maybe_unused]] auto result = str.writeInto(
+        js, view.asArrayPtr().asChars(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+    KJ_DASSERT(result.written == view.size());
+    return jsg::JsUint8Array(view.getHandle(js).As<v8::Uint8Array>());
+  }
+
+  jsg::JsString str = input.orDefault(js.str());
+
+  size_t utf8_length = 0;
+  auto length = str.length(js);
+
+#ifdef KJ_DEBUG
+  bool wasAlreadyFlat = str.isFlat();
+  KJ_DEFER({ KJ_ASSERT(wasAlreadyFlat || !str.isFlat()); });
+#endif
+
+  // Note: writeInto() doesn't flatten the string - it calls writeTo() which chains through
+  // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat.
+  // This means we may read from multiple string segments, but that's fine for our use case.
+
+  if (str.isOneByte(js)) {
+    // Use off-heap allocation for intermediate Latin-1 buffer to avoid wasting V8 heap space
+    // and potentially triggering GC. Stack allocation for small strings, heap for large.
+    kj::SmallArray<kj::byte, MAX_SIZE_FOR_STACK_ALLOC> latin1Buffer(length);
+
+    [[maybe_unused]] auto writeResult = str.writeInto(js, latin1Buffer.asPtr());
+    KJ_DASSERT(
+        writeResult.written == length, "writeInto must completely overwrite the backing buffer");
+
+    utf8_length = simdutf::utf8_length_from_latin1(
+        reinterpret_cast<const char*>(latin1Buffer.begin()), length);
+
+    auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+    if (utf8_length == length) {
+      // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
+      kj::arrayPtr(static_cast<kj::byte*>(backingStore->Data()), length).copyFrom(latin1Buffer);
+    } else {
+      [[maybe_unused]] auto written =
+          simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(latin1Buffer.begin()),
+              length, reinterpret_cast<char*>(backingStore->Data()));
+      KJ_DASSERT(utf8_length == written);
+    }
+    return jsg::JsUint8Array::create(js, kj::mv(backingStore), 0, utf8_length);
+  }
+
+  // Use off-heap allocation for intermediate UTF-16 buffer to avoid wasting V8 heap space
+  // and potentially triggering GC. Stack allocation for small strings, heap for large.
+  // Stack allocation for small strings, heap for large.
+  kj::SmallArray<uint16_t, MAX_SIZE_FOR_STACK_ALLOC> utf16Buffer(length);
+
+  [[maybe_unused]] auto writeResult = str.writeInto(js, utf16Buffer.asPtr());
+  KJ_DASSERT(
+      writeResult.written == length, "writeInto must completely overwrite the backing buffer");
+
+  auto data = reinterpret_cast<char16_t*>(utf16Buffer.begin());
+  auto lengthResult = simdutf::utf8_length_from_utf16_with_replacement(data, length);
+  utf8_length = lengthResult.count;
+
+  if (lengthResult.error == simdutf::SURROGATE) {
+    // If there are surrogates there may be unpaired surrogates. Fix them.
+    simdutf::to_well_formed_utf16(data, length, data);
+  } else {
+    KJ_DASSERT(lengthResult.error == simdutf::SUCCESS);
+  }
+
+  auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+  [[maybe_unused]] auto written =
+      simdutf::convert_utf16_to_utf8(data, length, reinterpret_cast<char*>(backingStore->Data()));
+  KJ_DASSERT(written == utf8_length, "Conversion yielded wrong number of UTF-8 bytes");
+
+  return jsg::JsUint8Array::create(js, kj::mv(backingStore), 0, utf8_length);
+}
+
 namespace {
-TextEncoder::EncodeIntoResult encodeIntoImpl(
-    jsg::Lock& js, jsg::JsString input, jsg::BufferSource& buffer) {
-  auto result = input.writeInto(
-      js, buffer.asArrayPtr().asChars(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
-  return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(result.read),
-    .written = static_cast<int>(result.written),
-  };
+
+constexpr bool isSurrogatePair(uint16_t lead, uint16_t trail) {
+  // We would like to use simdutf::trim_partial_utf16, but it's not guaranteed
+  // to work right on invalid UTF-16. Hence, we need this method to check for
+  // surrogate pairs and correctly trim utf16 chunks.
+  return (lead & 0xfc00) == 0xd800 && (trail & 0xfc00) == 0xdc00;
+}
+
+// Ignores surrogates conservatively.
+constexpr size_t simpleUtfEncodingLength(uint16_t c) {
+  return 1 + (c >= 0x80) + (c >= 0x400);
 }
+
+// Find how many UTF-16 or Latin1 code units fit when converted to UTF-8.
+// May conservatively underestimate the largest number of code units we can fit
+// because of undetected surrogate pairs on boundaries.
+// Works even on malformed UTF-16.
+template <typename Char>
+size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
+  size_t pos = 0;
+  size_t utf8Accumulated = 0;
+  // The SIMD is more efficient with a size that's a little over a multiple of 16.
+  constexpr size_t CHUNK = 257;
+  // The max number of UTF-8 output bytes per input code unit.
+  constexpr bool UTF16 = sizeof(Char) == 2;
+  constexpr size_t MAX_FACTOR = UTF16 ? 3 : 2;
+
+  // Our initial guess at how much the number of elements expands in the
+  // conversion to UTF-8.
+  double expansion = 1.15;
+
+  while (pos < length && utf8Accumulated < bufferSize) {
+    size_t remainingInput = length - pos;
+    size_t spaceRemaining = bufferSize - utf8Accumulated;
+    KJ_DASSERT(expansion >= 1.15);
+
+    // We estimate how many characters are likely to fit in the buffer, but
+    // only try for CHUNK characters at a time to minimize the worst case
+    // waste of time if we guessed too high.
+    size_t guaranteedToFit = spaceRemaining / MAX_FACTOR;
+    if (guaranteedToFit >= remainingInput) {
+      // Don't even bother checking any more, it's all going to fit.  Hitting
+      // this halfway through is also a good reason to limit the CHUNK size.
+      return length;
+    }
+    size_t likelyToFit = kj::min(static_cast<size_t>(spaceRemaining / expansion), CHUNK);
+    size_t fitEstimate = kj::max(1, kj::max(guaranteedToFit, likelyToFit));
+    size_t chunkSize = kj::min(remainingInput, fitEstimate);
+    if (chunkSize == 1) break;  // Not worth running this complicated stuff one char at a time.
+    // No div-by-zero because remainingInput and fitEstimate are at least 1.
+    KJ_DASSERT(chunkSize >= 1);
+
+    size_t chunkUtf8Len;
+    if constexpr (UTF16) {
+      chunkUtf8Len = simdutf::utf8_length_from_utf16_with_replacement(data + pos, chunkSize).count;
+    } else {
+      chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
+    }
+
+    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
+      // Our chosen chunk didn't fit in the rest of the output buffer.
+      KJ_DASSERT(chunkSize > guaranteedToFit);
+      // Since it didn't fit we adjust our expansion guess upwards.
+      expansion = kj::max(expansion * 1.1, (chunkUtf8Len * 1.1) / chunkSize);
+    } else {
+      // Use successful length calculation to adjust our expansion estimate.
+      expansion = kj::max(1.15, (chunkUtf8Len * 1.1) / chunkSize);
+      pos += chunkSize;
+      utf8Accumulated += chunkUtf8Len;
+    }
+  }
+  // Do the last few code units in a simpler way.
+  while (pos < length && utf8Accumulated < bufferSize) {
+    size_t extra = simpleUtfEncodingLength(data[pos]);
+    if (utf8Accumulated + extra > bufferSize) break;
+    pos++;
+    utf8Accumulated += extra;
+  }
+  if (UTF16 && pos != 0 && pos != length && isSurrogatePair(data[pos - 1], data[pos])) {
+    // We ended on a leading surrogate which has a matching trailing surrogate in the next
+    // position.  In order to make progress when the bufferSize is tiny we try to include it.
+    if (utf8Accumulated < bufferSize) {
+      pos++;  // We had one more byte, so we can include the pair, UTF-8 encoding 3->4.
+    } else {
+      pos--;  // Don't chop the pair in half.
+    }
+  }
+  return pos;
+}
+
 }  // namespace
 
-jsg::BufferSource TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
-  auto str = input.orDefault(js.str());
-  auto view = JSG_REQUIRE_NONNULL(jsg::BufferSource::tryAlloc(js, str.utf8Length(js)), RangeError,
-      "Cannot allocate space for TextEncoder.encode");
-  [[maybe_unused]] auto result = encodeIntoImpl(js, str, view);
-  KJ_DASSERT(result.written == view.size());
-  return kj::mv(view);
+// Test helpers used by encoding-test.c++ to verify findBestFit behavior.
+namespace test {
+
+size_t bestFit(const char* str, size_t bufferSize) {
+  return findBestFit(str, strlen(str), bufferSize);
 }
 
+size_t bestFit(const char16_t* str, size_t bufferSize) {
+  size_t length = 0;
+  while (str[length] != 0) length++;
+  return findBestFit(str, length, bufferSize);
+}
+
+}  // namespace test
+
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer) {
-  auto result = input.writeInto(
-      js, buffer.asArrayPtr<char>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+  if (!workerd::util::Autogate::isEnabled(workerd::util::AutogateKey::ENABLE_FAST_TEXTENCODER)) {
+    auto result = input.writeInto(
+        js, buffer.asArrayPtr<char>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(result.read),
+      .written = static_cast<int>(result.written),
+    };
+  }
+
+  auto outputBuf = buffer.asArrayPtr<char>();
+  size_t bufferSize = outputBuf.size();
+
+  size_t read = 0;
+  size_t written = 0;
+  {
+    // Scope for the view - we can't do anything that might cause a V8 GC!
+    v8::String::ValueView view(js.v8Isolate, input);
+    size_t length = view.length();
+
+    if (view.is_one_byte()) {
+      auto data = reinterpret_cast<const char*>(view.data8());
+      simdutf::result result =
+          simdutf::validate_ascii_with_errors(data, kj::min(length, bufferSize));
+      written = read = result.count;
+      auto outAddr = outputBuf.begin();
+      kj::arrayPtr(outAddr, read).copyFrom(kj::arrayPtr(data, read));
+      outAddr += read;
+      data += read;
+      length -= read;
+      bufferSize -= read;
+      if (length != 0 && bufferSize != 0) {
+        size_t rest = findBestFit(data, length, bufferSize);
+        if (rest != 0) {
+          KJ_DASSERT(simdutf::utf8_length_from_latin1(data, rest) <= bufferSize);
+          written += simdutf::convert_latin1_to_utf8(data, rest, outAddr);
+          read += rest;
+        }
+      }
+    } else {
+      auto data = reinterpret_cast<const char16_t*>(view.data16());
+      read = findBestFit(data, length, bufferSize);
+      if (read != 0) {
+        KJ_DASSERT(
+            simdutf::utf8_length_from_utf16_with_replacement(data, read).count <= bufferSize);
+        simdutf::result result =
+            simdutf::convert_utf16_to_utf8_with_errors(data, read, outputBuf.begin());
+        if (result.error == simdutf::SUCCESS) {
+          written = result.count;
+        } else {
+          // Oh, no, there are unpaired surrogates.  This is hopefully rare.
+          kj::SmallArray<char16_t, MAX_SIZE_FOR_STACK_ALLOC> conversionBuffer(read);
+          simdutf::to_well_formed_utf16(data, read, conversionBuffer.begin());
+          written =
+              simdutf::convert_utf16_to_utf8(conversionBuffer.begin(), read, outputBuf.begin());
+        }
+      }
+    }
+  }
+  KJ_DASSERT(written <= outputBuf.size());
+  // V8's String::kMaxLenth is a lot less than a maximal int so this is fine.
+  using RInt = decltype(TextEncoder::EncodeIntoResult::read);
+  using WInt = decltype(TextEncoder::EncodeIntoResult::written);
+  KJ_DASSERT(0 <= read && read <= std::numeric_limits<RInt>::max());
+  KJ_DASSERT(0 <= written && written <= std::numeric_limits<WInt>::max());
   return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(result.read),
-    .written = static_cast<int>(result.written),
+    .read = static_cast<RInt>(read),
+    .written = static_cast<WInt>(written),
   };
 }
 
diff --git a/src/workerd/api/encoding.h b/src/workerd/api/encoding.h
index adaae247e89..5ffc69573e6 100644
--- a/src/workerd/api/encoding.h
+++ b/src/workerd/api/encoding.h
@@ -142,7 +142,7 @@ class TextEncoder final: public jsg::Object {
 
   static jsg::Ref<TextEncoder> constructor(jsg::Lock& js);
 
-  jsg::BufferSource encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input);
+  jsg::JsUint8Array encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input);
 
   EncodeIntoResult encodeInto(jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer);
 
@@ -160,11 +160,7 @@ class TextEncoder final: public jsg::Object {
       JSG_READONLY_INSTANCE_PROPERTY(encoding, getEncoding);
     }
 
-    // `encode()` returns `jsg::BufferSource`, which may be an `ArrayBuffer` or `ArrayBufferView`,
-    // but the implementation uses `jsg::BufferSource::tryAlloc()` which always tries to allocate a
-    // `Uint8Array`. The spec defines that this function returns a `Uint8Array` too.
     JSG_TS_OVERRIDE({
-      encode(input?: string): Uint8Array;
       encodeInto(input: string, buffer: Uint8Array): TextEncoderEncodeIntoResult;
     });
   }
@@ -173,4 +169,11 @@ class TextEncoder final: public jsg::Object {
 #define EW_ENCODING_ISOLATE_TYPES                                                                  \
   api::TextDecoder, api::TextEncoder, api::TextDecoder::ConstructorOptions,                        \
       api::TextDecoder::DecodeOptions, api::TextEncoder::EncodeIntoResult
+
+namespace test {
+
+size_t bestFit(const char* str, size_t bufferSize);
+size_t bestFit(const char16_t* str, size_t bufferSize);
+
+}  // namespace test
 }  // namespace workerd::api
diff --git a/src/workerd/api/streams/encoding.c++ b/src/workerd/api/streams/encoding.c++
index 7fe67ce5e68..ede16250b83 100644
--- a/src/workerd/api/streams/encoding.c++
+++ b/src/workerd/api/streams/encoding.c++
@@ -93,9 +93,7 @@ jsg::Ref<TextEncoderStream> TextEncoderStream::constructor(jsg::Lock& js) {
     if (holder->pending != kj::none) {
       auto backingStore = js.allocBackingStore(3, jsg::Lock::AllocOption::UNINITIALIZED);
       memcpy(backingStore->Data(), REPLACEMENT_UTF8, 3);
-      auto array =
-          v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, 3);
-      controller->enqueue(js, jsg::JsUint8Array(array));
+      controller->enqueue(js, jsg::JsUint8Array::create(js, kj::mv(backingStore), 0, 3));
     }
     return js.resolvedPromise();
   };
diff --git a/src/workerd/jsg/buffersource.h b/src/workerd/jsg/buffersource.h
index cb86e82ca67..9d711d4ec82 100644
--- a/src/workerd/jsg/buffersource.h
+++ b/src/workerd/jsg/buffersource.h
@@ -102,9 +102,10 @@ class BackingStore {
 
   // Creates a new BackingStore of the given size.
   template <BufferSourceType T = v8::Uint8Array>
-  static BackingStore alloc(Lock& js, size_t size) {
-    return BackingStore(js.allocBackingStore(size), size, 0, getBufferSourceElementSize<T>(),
-        construct<T>, checkIsIntegerType<T>());
+  static BackingStore alloc(
+      Lock& js, size_t size, Lock::AllocOption init_mode = Lock::AllocOption::ZERO_INITIALIZED) {
+    return BackingStore(js.allocBackingStore(size, init_mode), size, 0,
+        getBufferSourceElementSize<T>(), construct<T>, checkIsIntegerType<T>());
   }
 
   using Disposer = void(void*, size_t, void*);
diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h
index faeb4944fa8..44a09cba60f 100644
--- a/src/workerd/jsg/jsg.h
+++ b/src/workerd/jsg/jsg.h
@@ -2758,6 +2758,14 @@ class Lock {
 
   // Utility method to safely allocate a v8::BackingStore with allocation failure handling.
   // Throws a javascript error if allocation fails.
+  //
+  // IMPORTANT: This method can trigger garbage collection, which may move or invalidate V8
+  // objects. Do NOT call this method while:
+  // - A v8::String::ValueView is alive (it holds internal V8 heap locks)
+  // - You have raw pointers to V8 heap data (e.g., from view.data8(), view.data16())
+  //
+  // Safe pattern: Copy V8 string data to off-heap memory FIRST (e.g., via JsString::writeInto()
+  // into kj::SmallArray), THEN call allocBackingStore(). See TextEncoder::encode() for example.
   std::unique_ptr<v8::BackingStore> allocBackingStore(
       size_t size, AllocOption init_mode = AllocOption::ZERO_INITIALIZED) KJ_WARN_UNUSED_RESULT;
 
diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h
index 61481f4521d..25ee99fe228 100644
--- a/src/workerd/jsg/jsvalue.h
+++ b/src/workerd/jsg/jsvalue.h
@@ -254,6 +254,12 @@ class JsArrayBufferView final: public JsBase<v8::ArrayBufferView, JsArrayBufferV
 
 class JsUint8Array final: public JsBase<v8::Uint8Array, JsUint8Array> {
  public:
+  static JsUint8Array create(
+      Lock& js, std::unique_ptr<v8::BackingStore> backingStore, size_t byteOffset, size_t length) {
+    return JsUint8Array(v8::Uint8Array::New(
+        v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), byteOffset, length));
+  }
+
   template <typename T = kj::byte>
   kj::ArrayPtr<T> asArrayPtr() {
     v8::Local<v8::Uint8Array> inner = *this;
@@ -277,6 +283,7 @@ class JsString final: public JsBase<v8::String, JsString> {
   int hashCode() const;
 
   bool isFlat() const;
+  bool isOneByte(Lock& js) const KJ_WARN_UNUSED_RESULT;
   bool containsOnlyOneByte() const;
 
   bool operator==(const JsString& other) const;
@@ -304,6 +311,12 @@ class JsString final: public JsBase<v8::String, JsString> {
     // The number of elements (e.g. char, byte, uint16_t) written to the buffer.
     size_t written;
   };
+
+  // Copy string contents into a provided buffer (off-heap memory).
+  //
+  // IMPORTANT: This method does NOT flatten the V8 string or hold V8 heap locks. It safely
+  // copies data out of V8's heap into your buffer. This makes it safe to use before calling
+  // GC-triggering operations like Lock::allocBackingStore().
   WriteIntoStatus writeInto(
       Lock& js, kj::ArrayPtr<char> buffer, WriteFlags options = WriteFlags::NONE) const;
   WriteIntoStatus writeInto(
@@ -986,6 +999,10 @@ inline int JsString::length(jsg::Lock& js) const {
   return inner->Length();
 }
 
+inline bool JsString::isOneByte(jsg::Lock& js) const {
+  return inner->IsOneByte();
+}
+
 inline size_t JsString::utf8Length(jsg::Lock& js) const {
   return inner->Utf8LengthV2(js.v8Isolate);
 }
diff --git a/src/workerd/util/autogate.c++ b/src/workerd/util/autogate.c++
index 4ff178c9170..211e0689374 100644
--- a/src/workerd/util/autogate.c++
+++ b/src/workerd/util/autogate.c++
@@ -33,6 +33,8 @@ kj::StringPtr KJ_STRINGIFY(AutogateKey key) {
       return "rpc-use-external-pusher"_kj;
     case AutogateKey::BLOB_USE_STREAMS_NEW_MEMORY_SOURCE:
       return "blob-use-streams-new-memory-source"_kj;
+    case AutogateKey::ENABLE_FAST_TEXTENCODER:
+      return "enable-fast-textencoder"_kj;
     case AutogateKey::NumOfKeys:
       KJ_FAIL_ASSERT("NumOfKeys should not be used in getName");
   }
diff --git a/src/workerd/util/autogate.h b/src/workerd/util/autogate.h
index 853a028c517..37b86ccfd19 100644
--- a/src/workerd/util/autogate.h
+++ b/src/workerd/util/autogate.h
@@ -29,6 +29,8 @@ enum class AutogateKey {
   RPC_USE_EXTERNAL_PUSHER,
   // Switch Blob stream() to use streams::newMemorySource instead of Blob::BlobInputStream
   BLOB_USE_STREAMS_NEW_MEMORY_SOURCE,
+  // Enable fast TextEncoder implementation using simdutf
+  ENABLE_FAST_TEXTENCODER,
   NumOfKeys  // Reserved for iteration.
 };