@@ -30,7 +30,7 @@ use datafusion::{
3030} ;
3131use datafusion_ext_commons:: {
3232 assume,
33- io:: { read_bytes_into_vec , read_bytes_slice, read_len, read_scalar, write_len, write_scalar} ,
33+ io:: { read_bytes_slice, read_len, read_scalar, write_len, write_scalar} ,
3434 unchecked,
3535} ;
3636use smallvec:: SmallVec ;
@@ -50,7 +50,7 @@ pub trait AccColumn: Send {
5050 fn num_records ( & self ) -> usize ;
5151 fn mem_used ( & self ) -> usize ;
5252 fn freeze_to_rows ( & self , idx : IdxSelection < ' _ > , array : & mut [ Vec < u8 > ] ) -> Result < ( ) > ;
53- fn unfreeze_from_rows ( & mut self , array : & [ & [ u8 ] ] , offsets : & mut [ usize ] ) -> Result < ( ) > ;
53+ fn unfreeze_from_rows ( & mut self , cursors : & mut [ Cursor < & [ u8 ] > ] ) -> Result < ( ) > ;
5454 fn spill ( & self , idx : IdxSelection < ' _ > , w : & mut SpillCompressedWriter ) -> Result < ( ) > ;
5555 fn unspill ( & mut self , num_rows : usize , r : & mut SpillCompressedReader ) -> Result < ( ) > ;
5656
@@ -442,7 +442,6 @@ impl AccColumn for AccGenericColumn {
442442 raw. set_len ( new_len) ;
443443 } else {
444444 raw. truncate ( new_len) ;
445- raw. set_len ( new_len) ;
446445 }
447446 }
448447 valids. resize ( len, false ) ;
@@ -559,44 +558,38 @@ impl AccColumn for AccGenericColumn {
559558 Ok ( ( ) )
560559 }
561560
562- fn unfreeze_from_rows ( & mut self , array : & [ & [ u8 ] ] , offsets : & mut [ usize ] ) -> Result < ( ) > {
563- let mut idx = self . num_records ( ) ;
564- self . resize ( idx + array . len ( ) ) ;
561+ fn unfreeze_from_rows ( & mut self , cursors : & mut [ Cursor < & [ u8 ] > ] ) -> Result < ( ) > {
562+ assert_eq ! ( self . num_records( ) , 0 , "expect empty AccColumn" ) ;
563+ self . resize ( cursors . len ( ) ) ;
565564
566565 match self {
567566 & mut AccGenericColumn :: Prim {
568567 ref mut raw,
569568 ref mut valids,
570569 prim_size,
571570 } => {
572- for ( data, offset) in array. iter ( ) . zip ( offsets) {
573- let mut r = Cursor :: new ( data) ;
574- r. set_position ( * offset as u64 ) ;
575-
576- let valid = r. read_u8 ( ) ?;
571+ for ( idx, cursor) in cursors. iter_mut ( ) . enumerate ( ) {
572+ let valid = cursor. read_u8 ( ) ?;
577573 if valid == 1 {
578- r. read_exact ( & mut raw. as_raw_bytes_mut ( ) [ prim_size * idx..] [ ..prim_size] ) ?;
574+ cursor. read_exact (
575+ & mut raw. as_raw_bytes_mut ( ) [ prim_size * idx..] [ ..prim_size] ,
576+ ) ?;
579577 valids. set ( idx, true ) ;
580578 } else {
581579 valids. set ( idx, false ) ;
582580 }
583- * offset = r. position ( ) as usize ;
584- idx += 1 ;
585581 }
586582 }
587583 AccGenericColumn :: Bytes {
588584 items,
589585 heap_mem_used,
590586 } => {
591- for ( data, offset) in array. iter ( ) . zip ( offsets) {
592- let mut r = Cursor :: new ( data) ;
593- r. set_position ( * offset as u64 ) ;
594-
595- let len = read_len ( & mut r) ?;
587+ for ( idx, cursor) in cursors. iter_mut ( ) . enumerate ( ) {
588+ let len = read_len ( cursor) ?;
596589 if len > 0 {
597590 let len = len - 1 ;
598591 let bytes = AccBytes :: from_vec ( {
599- let vec: Vec < u8 > = read_bytes_slice ( & mut r , len) ?. into ( ) ;
592+ let vec: Vec < u8 > = read_bytes_slice ( cursor , len) ?. into ( ) ;
600593 vec
601594 } ) ;
602595 if bytes. spilled ( ) {
@@ -606,23 +599,16 @@ impl AccColumn for AccGenericColumn {
606599 } else {
607600 items[ idx] = None ;
608601 }
609- * offset = r. position ( ) as usize ;
610- idx += 1 ;
611602 }
612603 }
613604 AccGenericColumn :: Scalar {
614605 items,
615606 dt,
616607 heap_mem_used,
617608 } => {
618- for ( data, offset) in array. iter ( ) . zip ( offsets) {
619- let mut r = Cursor :: new ( data) ;
620- r. set_position ( * offset as u64 ) ;
621-
622- items[ idx] = read_scalar ( & mut r, dt, true ) ?;
609+ for ( idx, cursor) in cursors. iter_mut ( ) . enumerate ( ) {
610+ items[ idx] = read_scalar ( cursor, dt, true ) ?;
623611 * heap_mem_used += items[ idx] . size ( ) - size_of :: < ScalarValue > ( ) ;
624- * offset = r. position ( ) as usize ;
625- idx += 1 ;
626612 }
627613 }
628614 }
@@ -678,23 +664,20 @@ impl AccColumn for AccGenericColumn {
678664 }
679665
680666 fn unspill ( & mut self , num_rows : usize , r : & mut SpillCompressedReader ) -> Result < ( ) > {
681- let idx = self . num_records ( ) ;
682- self . resize ( idx + num_rows) ;
667+ assert_eq ! ( self . num_records( ) , 0 , "expect empty AccColumn" ) ;
668+ self . resize ( num_rows) ;
683669
684670 match self {
685671 & mut AccGenericColumn :: Prim {
686672 ref mut raw,
687673 ref mut valids,
688674 prim_size,
689675 } => {
690- let mut valid_buf = vec ! [ ] ;
691- let valid_len = ( num_rows + 7 ) / 8 ;
692- read_bytes_into_vec ( r, & mut valid_buf, valid_len) ?;
693- let unfreezed_valids = BitVec :: < u8 > :: from_vec ( valid_buf) ;
694- valids. truncate ( idx) ;
695- valids. extend_from_bitslice ( unfreezed_valids. as_bitslice ( ) ) ;
696-
697- for i in idx..idx + num_rows {
676+ let mut bits: BitVec < u8 > = BitVec :: repeat ( false , num_rows) ;
677+ r. read_exact ( bits. as_raw_mut_slice ( ) ) ?;
678+ valids. clear ( ) ;
679+ valids. extend_from_bitslice ( bits. as_bitslice ( ) ) ;
680+ for i in 0 ..num_rows {
698681 if valids[ i] {
699682 r. read_exact ( & mut raw. as_raw_bytes_mut ( ) [ prim_size * i..] [ ..prim_size] ) ?;
700683 }
@@ -704,7 +687,7 @@ impl AccColumn for AccGenericColumn {
704687 items,
705688 heap_mem_used,
706689 } => {
707- for i in idx..idx + num_rows {
690+ for i in 0 .. num_rows {
708691 let len = read_len ( r) ?;
709692 if len > 0 {
710693 let len = len - 1 ;
@@ -721,7 +704,7 @@ impl AccColumn for AccGenericColumn {
721704 dt,
722705 heap_mem_used,
723706 } => {
724- for i in idx..idx + num_rows {
707+ for i in 0 .. num_rows {
725708 items[ i] = read_scalar ( r, dt, true ) ?;
726709 * heap_mem_used += items[ i] . size ( ) - size_of :: < ScalarValue > ( ) ;
727710 }
0 commit comments