3535import org .apache .lucene .index .DocValues ;
3636import org .apache .lucene .index .IndexReader ;
3737import org .apache .lucene .index .LeafReaderContext ;
38+ import org .apache .lucene .index .NumericDocValues ;
3839import org .apache .lucene .index .SortedDocValues ;
3940import org .apache .lucene .index .SortedSetDocValues ;
41+ import org .apache .lucene .index .Terms ;
42+ import org .apache .lucene .index .TermsEnum ;
43+ import org .apache .lucene .search .CollectionTerminatedException ;
44+ import org .apache .lucene .search .Weight ;
4045import org .apache .lucene .util .ArrayUtil ;
4146import org .apache .lucene .util .BytesRef ;
4247import org .apache .lucene .util .PriorityQueue ;
4651import org .opensearch .common .util .LongHash ;
4752import org .opensearch .core .common .io .stream .StreamOutput ;
4853import org .opensearch .core .xcontent .XContentBuilder ;
54+ import org .opensearch .index .mapper .DocCountFieldMapper ;
4955import org .opensearch .search .DocValueFormat ;
5056import org .opensearch .search .aggregations .AggregationExecutionException ;
5157import org .opensearch .search .aggregations .Aggregator ;
7379
7480import static org .opensearch .search .aggregations .InternalOrder .isKeyOrder ;
7581import static org .apache .lucene .index .SortedSetDocValues .NO_MORE_ORDS ;
82+ import static org .apache .lucene .search .DocIdSetIterator .NO_MORE_DOCS ;
7683
7784/**
7885 * An aggregator of string values that relies on global ordinals in order to build buckets.
@@ -85,6 +92,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
8592
8693 private final LongPredicate acceptedGlobalOrdinals ;
8794 private final long valueCount ;
95+ private final String fieldName ;
96+ private Weight weight ;
8897 private final GlobalOrdLookupFunction lookupGlobalOrd ;
8998 protected final CollectionStrategy collectionStrategy ;
9099 protected int segmentsWithSingleValuedOrds = 0 ;
@@ -136,16 +145,105 @@ public GlobalOrdinalsStringTermsAggregator(
136145 return new DenseGlobalOrds ();
137146 });
138147 }
148+ this .fieldName = (valuesSource instanceof ValuesSource .Bytes .WithOrdinals .FieldData )
149+ ? ((ValuesSource .Bytes .WithOrdinals .FieldData ) valuesSource ).getIndexFieldName ()
150+ : null ;
139151 }
140152
141153 String descriptCollectionStrategy () {
142154 return collectionStrategy .describe ();
143155 }
144156
157+ public void setWeight (Weight weight ) {
158+ this .weight = weight ;
159+ }
160+
161+ /**
162+ Read doc frequencies directly from indexed terms in the segment to skip iterating through individual documents
163+ @param ctx The LeafReaderContext to collect terms from
164+ @param globalOrds The SortedSetDocValues for the field's ordinals
165+ @param ordCountConsumer A consumer to accept collected term frequencies
166+ @return A LeafBucketCollector implementation with collection termination, since collection is complete
167+ @throws IOException If an I/O error occurs during reading
168+ */
169+ LeafBucketCollector termDocFreqCollector (
170+ LeafReaderContext ctx ,
171+ SortedSetDocValues globalOrds ,
172+ BiConsumer <Long , Integer > ordCountConsumer
173+ ) throws IOException {
174+ if (weight == null ) {
175+ // Weight not assigned - cannot use this optimization
176+ return null ;
177+ } else {
178+ if (weight .count (ctx ) == 0 ) {
179+ // No documents matches top level query on this segment, we can skip the segment entirely
180+ return LeafBucketCollector .NO_OP_COLLECTOR ;
181+ } else if (weight .count (ctx ) != ctx .reader ().maxDoc ()) {
182+ // weight.count(ctx) == ctx.reader().maxDoc() implies there are no deleted documents and
183+ // top-level query matches all docs in the segment
184+ return null ;
185+ }
186+ }
187+
188+ Terms segmentTerms = ctx .reader ().terms (this .fieldName );
189+ if (segmentTerms == null ) {
190+ // Field is not indexed.
191+ return null ;
192+ }
193+
194+ NumericDocValues docCountValues = DocValues .getNumeric (ctx .reader (), DocCountFieldMapper .NAME );
195+ if (docCountValues .nextDoc () != NO_MORE_DOCS ) {
196+ // This segment has at least one document with the _doc_count field.
197+ return null ;
198+ }
199+
200+ TermsEnum indexTermsEnum = segmentTerms .iterator ();
201+ BytesRef indexTerm = indexTermsEnum .next ();
202+ TermsEnum globalOrdinalTermsEnum = globalOrds .termsEnum ();
203+ BytesRef ordinalTerm = globalOrdinalTermsEnum .next ();
204+
205+ // Iterate over the terms in the segment, look for matches in the global ordinal terms,
206+ // and increment bucket count when segment terms match global ordinal terms.
207+ while (indexTerm != null && ordinalTerm != null ) {
208+ int compare = indexTerm .compareTo (ordinalTerm );
209+ if (compare == 0 ) {
210+ if (acceptedGlobalOrdinals .test (globalOrdinalTermsEnum .ord ())) {
211+ ordCountConsumer .accept (globalOrdinalTermsEnum .ord (), indexTermsEnum .docFreq ());
212+ }
213+ indexTerm = indexTermsEnum .next ();
214+ ordinalTerm = globalOrdinalTermsEnum .next ();
215+ } else if (compare < 0 ) {
216+ indexTerm = indexTermsEnum .next ();
217+ } else {
218+ ordinalTerm = globalOrdinalTermsEnum .next ();
219+ }
220+ }
221+ return new LeafBucketCollector () {
222+ @ Override
223+ public void collect (int doc , long owningBucketOrd ) throws IOException {
224+ throw new CollectionTerminatedException ();
225+ }
226+ };
227+ }
228+
145229 @ Override
146230 public LeafBucketCollector getLeafCollector (LeafReaderContext ctx , LeafBucketCollector sub ) throws IOException {
147231 SortedSetDocValues globalOrds = valuesSource .globalOrdinalsValues (ctx );
148232 collectionStrategy .globalOrdsReady (globalOrds );
233+
234+ if (collectionStrategy instanceof DenseGlobalOrds
235+ && this .resultStrategy instanceof StandardTermsResults
236+ && sub == LeafBucketCollector .NO_OP_COLLECTOR ) {
237+ LeafBucketCollector termDocFreqCollector = termDocFreqCollector (
238+ ctx ,
239+ globalOrds ,
240+ (ord , docCount ) -> incrementBucketDocCount (collectionStrategy .globalOrdToBucketOrd (0 , ord ), docCount )
241+ );
242+ if (termDocFreqCollector != null ) {
243+ return termDocFreqCollector ;
244+ }
245+ }
246+
149247 SortedDocValues singleValues = DocValues .unwrapSingleton (globalOrds );
150248 if (singleValues != null ) {
151249 segmentsWithSingleValuedOrds ++;
@@ -343,9 +441,20 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
343441 final SortedSetDocValues segmentOrds = valuesSource .ordinalsValues (ctx );
344442 segmentDocCounts = context .bigArrays ().grow (segmentDocCounts , 1 + segmentOrds .getValueCount ());
345443 assert sub == LeafBucketCollector .NO_OP_COLLECTOR ;
346- final SortedDocValues singleValues = DocValues .unwrapSingleton (segmentOrds );
347444 mapping = valuesSource .globalOrdinalsMapping (ctx );
348- // Dense mode doesn't support include/exclude so we don't have to check it here.
445+
446+ if (this .resultStrategy instanceof StandardTermsResults ) {
447+ LeafBucketCollector termDocFreqCollector = this .termDocFreqCollector (
448+ ctx ,
449+ segmentOrds ,
450+ (ord , docCount ) -> incrementBucketDocCount (mapping .applyAsLong (ord ), docCount )
451+ );
452+ if (termDocFreqCollector != null ) {
453+ return termDocFreqCollector ;
454+ }
455+ }
456+
457+ final SortedDocValues singleValues = DocValues .unwrapSingleton (segmentOrds );
349458 if (singleValues != null ) {
350459 segmentsWithSingleValuedOrds ++;
351460 return resultStrategy .wrapCollector (new LeafBucketCollectorBase (sub , segmentOrds ) {
0 commit comments