diff --git a/parquet/file/file_reader.go b/parquet/file/file_reader.go index c566ec2a..4025939c 100644 --- a/parquet/file/file_reader.go +++ b/parquet/file/file_reader.go @@ -322,6 +322,11 @@ func (f *Reader) RowGroup(i int) *RowGroupReader { fileDecryptor: f.fileDecryptor, bufferPool: &f.bufferPool, pageIndexReader: f.pageIndexReader, + // don't pre-emptively initialize the row group page index reader + // do it on demand, but ensure that it is goroutine safe. + rgPageIndexReader: sync.OnceValues(func() (*metadata.RowGroupPageIndexReader, error) { + return f.pageIndexReader.RowGroup(i) + }), } } diff --git a/parquet/file/row_group_reader.go b/parquet/file/row_group_reader.go index acfac0ea..ea5f7098 100644 --- a/parquet/file/row_group_reader.go +++ b/parquet/file/row_group_reader.go @@ -42,7 +42,7 @@ type RowGroupReader struct { fileDecryptor encryption.FileDecryptor pageIndexReader *metadata.PageIndexReader - rgPageIndexReader *metadata.RowGroupPageIndexReader + rgPageIndexReader func() (*metadata.RowGroupPageIndexReader, error) bufferPool *sync.Pool } @@ -86,12 +86,9 @@ func (r *RowGroupReader) GetColumnPageReader(i int) (PageReader, error) { return nil, err } - if r.rgPageIndexReader == nil { - rgIdx, err := r.pageIndexReader.RowGroup(int(r.rgMetadata.Ordinal())) - if err != nil { - return nil, err - } - r.rgPageIndexReader = rgIdx + rgIdxRdr, err := r.rgPageIndexReader() + if err != nil { + return nil, err } colStart := col.DataPageOffset() @@ -128,7 +125,7 @@ func (r *RowGroupReader) GetColumnPageReader(i int) (PageReader, error) { r: stream, chunk: col, colIdx: i, - pgIndexReader: r.rgPageIndexReader, + pgIndexReader: rgIdxRdr, maxPageHeaderSize: defaultMaxPageHeaderSize, nrows: col.NumValues(), mem: r.props.Allocator(), @@ -157,7 +154,7 @@ func (r *RowGroupReader) GetColumnPageReader(i int) (PageReader, error) { r: stream, chunk: col, colIdx: i, - pgIndexReader: r.rgPageIndexReader, + pgIndexReader: rgIdxRdr, maxPageHeaderSize: defaultMaxPageHeaderSize, nrows: col.NumValues(), mem: r.props.Allocator(), @@ -181,7 +178,7 @@ func (r *RowGroupReader) GetColumnPageReader(i int) (PageReader, error) { r: stream, chunk: col, colIdx: i, - pgIndexReader: r.rgPageIndexReader, + pgIndexReader: rgIdxRdr, maxPageHeaderSize: defaultMaxPageHeaderSize, nrows: col.NumValues(), mem: r.props.Allocator(),