Skip to content

Commit e160cd6

Browse files
committed
Made logic to split by spaces faster
1 parent 63d6919 commit e160cd6

1 file changed

Lines changed: 10 additions & 15 deletions

File tree

src/TextSplitter.ts

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -178,23 +178,18 @@ export class TextSplitter {
178178
}
179179

180180
private splitBySpaces(text: string): string[] {
181+
// Split text by tokens and return parts
181182
const parts: string[] = [];
182-
const words = text.split(' ');
183-
if (words.length > 0) {
184-
let part = words[0];
185-
for (let i = 1; i < words.length; i++) {
186-
const nextWord = words[i];
187-
if (this._config.tokenizer.encode(part + ' ' + nextWord).length <= this._config.chunkSize) {
188-
part += ' ' + nextWord;
189-
} else {
190-
parts.push(part);
191-
part = nextWord;
192-
}
183+
let tokens = this._config.tokenizer.encode(text);
184+
do {
185+
if (tokens.length <= this._config.chunkSize) {
186+
parts.push(this._config.tokenizer.decode(tokens));
187+
break;
188+
} else {
189+
const span = tokens.splice(0, this._config.chunkSize);
190+
parts.push(this._config.tokenizer.decode(span));
193191
}
194-
parts.push(part);
195-
} else {
196-
parts.push(text);
197-
}
192+
} while (true);
198193

199194
return parts;
200195
}

0 commit comments

Comments
 (0)