File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -178,23 +178,18 @@ export class TextSplitter {
178178 }
179179
180180 private splitBySpaces ( text : string ) : string [ ] {
181+ // Split text by tokens and return parts
181182 const parts : string [ ] = [ ] ;
182- const words = text . split ( ' ' ) ;
183- if ( words . length > 0 ) {
184- let part = words [ 0 ] ;
185- for ( let i = 1 ; i < words . length ; i ++ ) {
186- const nextWord = words [ i ] ;
187- if ( this . _config . tokenizer . encode ( part + ' ' + nextWord ) . length <= this . _config . chunkSize ) {
188- part += ' ' + nextWord ;
189- } else {
190- parts . push ( part ) ;
191- part = nextWord ;
192- }
183+ let tokens = this . _config . tokenizer . encode ( text ) ;
184+ do {
185+ if ( tokens . length <= this . _config . chunkSize ) {
186+ parts . push ( this . _config . tokenizer . decode ( tokens ) ) ;
187+ break ;
188+ } else {
189+ const span = tokens . splice ( 0 , this . _config . chunkSize ) ;
190+ parts . push ( this . _config . tokenizer . decode ( span ) ) ;
193191 }
194- parts . push ( part ) ;
195- } else {
196- parts . push ( text ) ;
197- }
192+ } while ( true ) ;
198193
199194 return parts ;
200195 }
You can’t perform that action at this time.
0 commit comments