Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sort df.compileTimeSchema() columns according to df.schema() so they're easier to compare #990

Merged
merged 1 commit into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -10130,6 +10130,10 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/MapKt {
public static final fun mapNotNullValues (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
public static final fun compileTimeSchemaImpl (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lkotlin/reflect/KClass;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package org.jetbrains.kotlinx.dataframe.api
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.impl.api.compileTimeSchemaImpl
import org.jetbrains.kotlinx.dataframe.impl.owner
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema

// region DataRow
Expand All @@ -26,5 +26,5 @@ public fun GroupBy<*, *>.schema(): DataFrameSchema = toDataFrame().schema()

// endregion

@Suppress("UnusedReceiverParameter")
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema = getSchema(T::class)
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema =
compileTimeSchemaImpl(schema(), T::class)
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.jetbrains.kotlinx.dataframe.impl.api

import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import kotlin.reflect.KClass

@PublishedApi
internal fun compileTimeSchemaImpl(runtimeSchema: DataFrameSchema, klass: KClass<*>): DataFrameSchema {
val compileSchema = getSchema(klass)
val root = ColumnPath(emptyList())
val order = buildMap {
putColumnsOrder(runtimeSchema, path = root)
}
return compileSchema.sortedBy(order, path = root)
}

internal fun MutableMap<ColumnPath, Int>.putColumnsOrder(schema: DataFrameSchema, path: ColumnPath) {
schema.columns.entries.forEachIndexed { i, (name, column) ->
val columnPath = path + name
this[columnPath] = i
when (column) {
is ColumnSchema.Frame -> {
putColumnsOrder(column.schema, columnPath)
}

is ColumnSchema.Group -> {
putColumnsOrder(column.schema, columnPath)
}
}
}
}

internal fun DataFrameSchema.sortedBy(order: Map<ColumnPath, Int>, path: ColumnPath): DataFrameSchema {
val sorted = columns.map { (name, column) ->
name to when (column) {
is ColumnSchema.Frame -> ColumnSchema.Frame(
column.schema.sortedBy(order, path + name),
column.nullable,
column.contentType,
)

is ColumnSchema.Group -> ColumnSchema.Group(column.schema.sortedBy(order, path + name), column.contentType)

is ColumnSchema.Value -> column

else -> TODO("unexpected ColumnSchema class ${column::class}")
}
}.sortedBy { (name, _) ->
order[path + name]
}.toMap()
return DataFrameSchemaImpl(sorted)
}
26 changes: 26 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataRow
import org.junit.Test

class SchemaTests {
@Test
fun `columns order test`() {
val row = dataFrameOf("c", "b")(4, 5).first()
val df = dataFrameOf("abc", "a", "a123", "nested")(1, 2, 3, row).cast<Schema>()
df.schema().toString() shouldBe df.compileTimeSchema().toString()
}
}

private interface Schema {
val a: Int
val abc: Int
val a123: Int
val nested: DataRow<Nested>
}

private interface Nested {
val b: Int
val c: Int
}