Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,14 @@ import org.jetbrains.kotlinx.dataframe.api.NullabilityException
import org.jetbrains.kotlinx.dataframe.api.NullabilityOptions
import org.jetbrains.kotlinx.dataframe.api.applyNullability
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.count
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
import org.jetbrains.kotlinx.dataframe.api.getColumn
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.api.toDataFrameFromPairs
import org.jetbrains.kotlinx.dataframe.impl.asList
import java.io.File
import java.math.BigDecimal
Expand All @@ -89,13 +93,16 @@ internal fun <T> Iterable<DataFrame<T>>.concatKeepingSchema(): DataFrame<T> {
1 -> return dataFrames[0]
}

val columnNames = dataFrames.first().columnNames()
val columnPaths = dataFrames.first()
.getColumnsWithPaths { colsAtAnyDepth().filter { !it.isColumnGroup() } }
.map { it.path }

val columns = columnNames.map { name ->
val values = dataFrames.flatMap { it.getColumn(name).values() }
DataColumn.createValueColumn(name, values, dataFrames.first().getColumn(name).type())
val totalRows = dataFrames.sumOf { it.count() }
val columns = columnPaths.map { path ->
val values = dataFrames.flatMapTo(ArrayList(totalRows)) { it.getColumn(path).values() }
path to DataColumn.createValueColumn(path.name(), values, dataFrames.first().getColumn(path).type())
}
return dataFrameOf(columns).cast()
return columns.toDataFrameFromPairs()
}

private fun BitVector.values(range: IntRange): List<Boolean?> = range.map { getObject(it) }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package org.jetbrains.kotlinx.dataframe.io

import io.kotest.assertions.asClue
import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.collections.shouldContain
import io.kotest.matchers.shouldBe
import io.kotest.matchers.types.shouldBeInstanceOf
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
import kotlinx.datetime.UtcOffset
Expand Down Expand Up @@ -37,7 +39,9 @@ import org.jetbrains.kotlinx.dataframe.api.convertToBoolean
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.pathOf
import org.jetbrains.kotlinx.dataframe.api.print
import org.jetbrains.kotlinx.dataframe.api.remove
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
import org.junit.Assert
Expand Down Expand Up @@ -77,6 +81,18 @@ internal class ArrowKtTest {
df shouldBe expected
}

@Test
fun testReadingMultipleBatches() {
val df = DataFrame.readArrowFeather(testArrowFeather("multiple_batches_concat"))
df.schema().print()
df.schema().asClue {
df["id"].type() shouldBe typeOf<Int>()
val person = df["person"].shouldBeInstanceOf<ColumnGroup<*>>()
person["name"].type() shouldBe typeOf<String>()
person["age"].type() shouldBe typeOf<Int>()
}
}

@Test
fun testReadingAllTypesAsEstimated() {
assertEstimations(
Expand Down
Binary file not shown.
Loading