From ed4e7e3f720bf7c81bc16c7853d3969bd03f6ed0 Mon Sep 17 00:00:00 2001 From: candiduslynx Date: Mon, 27 Nov 2023 10:34:33 +0200 Subject: [PATCH 1/7] fix: Helpers for `org.apache.arrow.vector` types --- .../io/cloudquery/helper/ArrowHelper.java | 70 ++++++++++--------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java index 7c99307..086b5fa 100644 --- a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java +++ b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java @@ -1,7 +1,5 @@ package io.cloudquery.helper; -import static java.util.Arrays.asList; - import com.google.protobuf.ByteString; import io.cloudquery.scalar.ValidationException; import io.cloudquery.schema.Column; @@ -10,36 +8,9 @@ import io.cloudquery.schema.Table.TableBuilder; import io.cloudquery.types.JSONType.JSONVector; import io.cloudquery.types.UUIDType.UUIDVector; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.*; import org.apache.arrow.vector.ipc.ArrowReader; import org.apache.arrow.vector.ipc.ArrowStreamReader; import org.apache.arrow.vector.ipc.ArrowStreamWriter; @@ -48,6 +19,13 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Text; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.channels.Channels; +import java.util.*; + +import static java.util.Arrays.asList; + public class ArrowHelper { public static final String CQ_EXTENSION_INCREMENTAL = "cq:extension:incremental"; public static final String CQ_EXTENSION_CONSTRAINT_NAME = "cq:extension:constraint_name"; @@ -72,6 +50,18 @@ private static void setVectorData(FieldVector vector, Object data) { bitVector.set(0, (boolean) data ? 1 : 0); return; } + if (vector instanceof DateDayVector dayDateVector) { + dayDateVector.set(0, (int) data); + return; + } + if (vector instanceof DateMilliVector dateMilliVector) { + dateMilliVector.set(0, (long) data); + return; + } + if (vector instanceof DurationVector durationVector) { + durationVector.set(0, (long) data); + return; + } if (vector instanceof FixedSizeBinaryVector fixedSizeBinaryVector) { fixedSizeBinaryVector.set(0, (byte[]) data); return; @@ -100,6 +90,22 @@ private static void setVectorData(FieldVector vector, Object data) { smallIntVector.set(0, (short) data); return; } + if (vector instanceof TimeMicroVector timeMicroVector) { + timeMicroVector.set(0, (long) data); + return; + } + if (vector instanceof TimeMilliVector timeMilliVector) { + timeMilliVector.set(0, (int) data); + return; + } + if (vector instanceof TimeNanoVector timeNanoVector) { + timeNanoVector.set(0, (long) data); + return; + } + if (vector instanceof TimeSecVector timeSecVector) { + timeSecVector.set(0, (int) data); + return; + } if (vector instanceof TimeStampVector timeStampVector) { timeStampVector.set(0, (long) data); return; @@ -140,10 +146,6 @@ private static void setVectorData(FieldVector vector, Object data) { jsonVector.setSafe(0, (byte[]) data); return; } - if (vector instanceof DateDayVector dayDateVector) { - dayDateVector.set(0, (int) data); - return; - } throw new IllegalArgumentException("Unsupported vector type: " + vector.getClass()); } From 76d67dbc33d1fb71c3e2174e1a883321dc5b673c Mon Sep 17 00:00:00 2001 From: candiduslynx Date: Mon, 27 Nov 2023 11:12:14 +0200 Subject: [PATCH 2/7] add uuidtype.instance --- lib/src/main/java/io/cloudquery/types/UUIDType.java | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/src/main/java/io/cloudquery/types/UUIDType.java b/lib/src/main/java/io/cloudquery/types/UUIDType.java index cdc6b69..e36ee39 100644 --- a/lib/src/main/java/io/cloudquery/types/UUIDType.java +++ b/lib/src/main/java/io/cloudquery/types/UUIDType.java @@ -12,6 +12,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; public class UUIDType extends ExtensionType { + public static final UUIDType INSTANCE = new UUIDType(); public static final int BYTE_WIDTH = 16; public static final String EXTENSION_NAME = "uuid"; From ef4e7248163c4e51434c35e82c4fccd8a4d40f16 Mon Sep 17 00:00:00 2001 From: candiduslynx Date: Mon, 27 Nov 2023 11:12:27 +0200 Subject: [PATCH 3/7] add decimal helpers --- .../java/io/cloudquery/helper/ArrowHelper.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java index 086b5fa..fd4f593 100644 --- a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java +++ b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java @@ -58,6 +58,14 @@ private static void setVectorData(FieldVector vector, Object data) { dateMilliVector.set(0, (long) data); return; } + if (vector instanceof Decimal256Vector decimal256Vector) { + decimal256Vector.set(0, (java.math.BigDecimal) data); + return; + } + if (vector instanceof DecimalVector decimalVector) { + decimalVector.set(0, (java.math.BigDecimal) data); + return; + } if (vector instanceof DurationVector durationVector) { durationVector.set(0, (long) data); return; @@ -138,14 +146,16 @@ private static void setVectorData(FieldVector vector, Object data) { vectorCharVector.set(0, (Text) data); return; } - if (vector instanceof UUIDVector uuidVector) { - uuidVector.set(0, (java.util.UUID) data); - return; - } + // CloudQuery-specific if (vector instanceof JSONVector jsonVector) { jsonVector.setSafe(0, (byte[]) data); return; } + // CloudQuery-specific + if (vector instanceof UUIDVector uuidVector) { + uuidVector.set(0, (java.util.UUID) data); + return; + } throw new IllegalArgumentException("Unsupported vector type: " + vector.getClass()); } From 572294524fb7e22ddcccabff59b07918f6ee72aa Mon Sep 17 00:00:00 2001 From: candiduslynx Date: Mon, 27 Nov 2023 11:12:42 +0200 Subject: [PATCH 4/7] tests init --- .../transformers/TypeTransformer.java | 2 +- .../io/cloudquery/helper/ArrowHelperTest.java | 58 ++++++++++++++++--- .../transformers/TransformWithClassTest.java | 11 ++-- .../transformers/TypeTransformerTest.java | 11 +++- 4 files changed, 68 insertions(+), 14 deletions(-) diff --git a/lib/src/main/java/io/cloudquery/transformers/TypeTransformer.java b/lib/src/main/java/io/cloudquery/transformers/TypeTransformer.java index 83f1076..4157f71 100644 --- a/lib/src/main/java/io/cloudquery/transformers/TypeTransformer.java +++ b/lib/src/main/java/io/cloudquery/transformers/TypeTransformer.java @@ -39,7 +39,7 @@ private static ArrowType transformArrowType(String name, Class type) return Timestamp.dt; } case "java.util.UUID" -> { - return new UUIDType(); + return UUIDType.INSTANCE; } default -> { if (type.isArray()) { diff --git a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java index 4a63eb3..580d589 100644 --- a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java +++ b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java @@ -16,9 +16,18 @@ import io.cloudquery.schema.Table; import java.io.IOException; import java.time.LocalDate; +import java.time.ZoneId; +import java.time.ZoneOffset; import java.util.List; import java.util.Map; +import java.util.SimpleTimeZone; +import java.util.TimeZone; + +import io.cloudquery.types.JSONType; +import io.cloudquery.types.UUIDType; import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -36,19 +45,52 @@ public class ArrowHelperTest { .columns( List.of( Column.builder() - .name("string_column1") + .name("pk") .type(ArrowType.Utf8.INSTANCE) .unique(true) .incrementalKey(true) .primaryKey(true) .build(), - Column.builder().name("string_column2").type(ArrowType.Utf8.INSTANCE).build(), - Column.builder().name("boolean_column").type(ArrowType.Bool.INSTANCE).build(), - Column.builder() - .name("date_days_column") - .type(new ArrowType.Date(DateUnit.DAY)) - .build())) - .build(); + Column.builder().name("big_int").type(Types.MinorType.BIGINT.getType()).build(), + Column.builder().name("bit").type(Types.MinorType.BIT.getType()).build(), + Column.builder().name("date_day").type(Types.MinorType.DATEDAY.getType()).build(), + Column.builder().name("date_milli").type(Types.MinorType.DATEMILLI.getType()).build(), + Column.builder().name("decimal256").type(ArrowType.Decimal.createDecimal(10,20,256)).build(), + Column.builder().name("decimal128").type(ArrowType.Decimal.createDecimal(10,20,128)).build(), + Column.builder().name("duration_s").type(new ArrowType.Duration(TimeUnit.SECOND)).build(), + Column.builder().name("duration_ms").type(new ArrowType.Duration(TimeUnit.MILLISECOND)).build(), + Column.builder().name("duration_us").type(new ArrowType.Duration(TimeUnit.MICROSECOND)).build(), + Column.builder().name("duration_ns").type(new ArrowType.Duration(TimeUnit.NANOSECOND)).build(), + Column.builder().name("fixed_size_binary_32").type(new ArrowType.FixedSizeBinary(32)).build(), + Column.builder().name("float4").type(Types.MinorType.FLOAT4.getType()).build(), + Column.builder().name("float8").type(Types.MinorType.FLOAT8.getType()).build(), + Column.builder().name("int").type(Types.MinorType.INT.getType()).build(), + Column.builder().name("large_varbinary").type(Types.MinorType.LARGEVARBINARY.getType()).build(), + Column.builder().name("large_varchar").type(Types.MinorType.LARGEVARCHAR.getType()).build(), + Column.builder().name("small_int").type(Types.MinorType.SMALLINT.getType()).build(), + Column.builder().name("time_s").type(Types.MinorType.TIMESEC.getType()).build(), + Column.builder().name("time_ms").type(Types.MinorType.TIMEMILLI.getType()).build(), + Column.builder().name("time_us").type(Types.MinorType.TIMEMICRO.getType()).build(), + Column.builder().name("time_ns").type(Types.MinorType.TIMENANO.getType()).build(), + Column.builder().name("timestamp_s").type(Types.MinorType.TIMESTAMPSEC.getType()).build(), + Column.builder().name("timestamp_ms").type(Types.MinorType.TIMESTAMPMILLI.getType()).build(), + Column.builder().name("timestamp_us").type(Types.MinorType.TIMESTAMPMICRO.getType()).build(), + Column.builder().name("timestamp_ns").type(Types.MinorType.TIMESTAMPNANO.getType()).build(), + Column.builder().name("timestamp_s_tz").type(new ArrowType.Timestamp(TimeUnit.SECOND, ZoneOffset.UTC.getId())).build(), + Column.builder().name("timestamp_ms_tz").type(new ArrowType.Timestamp(TimeUnit.MILLISECOND, ZoneOffset.UTC.getId())).build(), + Column.builder().name("timestamp_us_tz").type(new ArrowType.Timestamp(TimeUnit.MICROSECOND, ZoneOffset.UTC.getId())).build(), + Column.builder().name("timestamp_ns_tz").type(new ArrowType.Timestamp(TimeUnit.NANOSECOND, ZoneOffset.UTC.getId())).build(), + Column.builder().name("tiny_int").type(Types.MinorType.TINYINT.getType()).build(), + Column.builder().name("uint1").type(Types.MinorType.UINT1.getType()).build(), + Column.builder().name("uint2").type(Types.MinorType.UINT2.getType()).build(), + Column.builder().name("uint4").type(Types.MinorType.UINT4.getType()).build(), + Column.builder().name("uint8").type(Types.MinorType.UINT8.getType()).build(), + Column.builder().name("varbinary").type(Types.MinorType.VARBINARY.getType()).build(), + Column.builder().name("varchar").type(Types.MinorType.VARCHAR.getType()).build(), + Column.builder().name("json").type(JSONType.INSTANCE).build(), + Column.builder().name("uuid").type(UUIDType.INSTANCE).build() + ) + ).build(); @Test public void testToArrowSchema() { diff --git a/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java b/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java index e1e874b..1c77694 100644 --- a/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java +++ b/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java @@ -17,6 +17,9 @@ import java.time.ZoneOffset; import java.util.List; import java.util.Optional; +import java.util.UUID; + +import io.cloudquery.types.UUIDType; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.TimeUnit; import org.junit.jupiter.api.BeforeEach; @@ -57,6 +60,7 @@ public static final class TestClass { private byte[] byteArrayCol; private Object[] anyArrayCol; private LocalDateTime timeCol; + private UUID uuidCol; } public static final List expectedColumnsTestClass = @@ -91,10 +95,9 @@ public static final class TestClass { Column.builder().name("string_list_col").type(JSONType.INSTANCE).build(), Column.builder().name("byte_array_col").type(Binary.INSTANCE).build(), Column.builder().name("any_array_col").type(JSONType.INSTANCE).build(), - Column.builder() - .name("time_col") - .type(new Timestamp(TimeUnit.MILLISECOND, ZoneOffset.UTC.getId())) - .build()); + Column.builder().name("time_col").type(new Timestamp(TimeUnit.MILLISECOND, ZoneOffset.UTC.getId())).build(), + Column.builder().name("uuid_col").type(UUIDType.INSTANCE).build() + ); public static final List expectedColumnsSimpleClass = List.of( diff --git a/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java b/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java index df5704c..bf8774c 100644 --- a/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java +++ b/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java @@ -8,7 +8,10 @@ import java.time.LocalDateTime; import java.time.ZoneOffset; import java.util.Map; +import java.util.UUID; import java.util.stream.Stream; + +import io.cloudquery.types.UUIDType; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -42,6 +45,8 @@ private static class SimpleClass { private Map mapField; + private UUID uuidField; + private InnerClass innerClassObjectField; private int[] intArrayField; @@ -93,6 +98,9 @@ public static Stream testArgumentsSource() { // Map field Arguments.of("mapField", JSONType.INSTANCE), + // UUID field + Arguments.of("uuidField", UUIDType.INSTANCE), + // Inner class Arguments.of("innerClassObjectField", JSONType.INSTANCE), @@ -104,6 +112,7 @@ public static Stream testArgumentsSource() { Arguments.of("byteArrayField", ArrowType.Binary.INSTANCE), // Object array - Arguments.of("objectArrayField", JSONType.INSTANCE)); + Arguments.of("objectArrayField", JSONType.INSTANCE) + ); } } From 2c8cab8f71355ba6aca1ff5cd5e4dd351df322ea Mon Sep 17 00:00:00 2001 From: candiduslynx Date: Mon, 27 Nov 2023 12:27:54 +0200 Subject: [PATCH 5/7] fix `io.cloudquery.helper.ArrowHelperTest.testRoundTripTableEncoding` --- .../io/cloudquery/helper/ArrowHelper.java | 66 ++++++++++++------- .../io/cloudquery/helper/ArrowHelperTest.java | 66 ++++++++++++++----- 2 files changed, 91 insertions(+), 41 deletions(-) diff --git a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java index fd4f593..406077d 100644 --- a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java +++ b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java @@ -6,7 +6,9 @@ import io.cloudquery.schema.Resource; import io.cloudquery.schema.Table; import io.cloudquery.schema.Table.TableBuilder; +import io.cloudquery.types.JSONType; import io.cloudquery.types.JSONType.JSONVector; +import io.cloudquery.types.UUIDType; import io.cloudquery.types.UUIDType.UUIDVector; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -14,6 +16,7 @@ import org.apache.arrow.vector.ipc.ArrowReader; import org.apache.arrow.vector.ipc.ArrowStreamReader; import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; @@ -189,17 +192,7 @@ public static Schema toArrowSchema(Table table) { List columns = table.getColumns(); Field[] fields = new Field[columns.size()]; for (int i = 0; i < columns.size(); i++) { - Column column = columns.get(i); - Map metadata = new HashMap<>(); - metadata.put(CQ_EXTENSION_UNIQUE, Boolean.toString(column.isUnique())); - metadata.put(CQ_EXTENSION_PRIMARY_KEY, Boolean.toString(column.isPrimaryKey())); - metadata.put(CQ_EXTENSION_INCREMENTAL, Boolean.toString(column.isIncrementalKey())); - Field field = - new Field( - column.getName(), - new FieldType(!column.isNotNull(), column.getType(), null, metadata), - null); - fields[i] = field; + fields[i] = getField(columns.get(i)); } Map metadata = new HashMap<>(); metadata.put(CQ_TABLE_NAME, table.getName()); @@ -216,23 +209,18 @@ public static Schema toArrowSchema(Table table) { return new Schema(asList(fields), metadata); } + private static Field getField(Column column) { + Map metadata = new HashMap<>(); + metadata.put(CQ_EXTENSION_UNIQUE, Boolean.toString(column.isUnique())); + metadata.put(CQ_EXTENSION_PRIMARY_KEY, Boolean.toString(column.isPrimaryKey())); + metadata.put(CQ_EXTENSION_INCREMENTAL, Boolean.toString(column.isIncrementalKey())); + return new Field(column.getName(), new FieldType(!column.isNotNull(), column.getType(), null, metadata), null); + } + public static Table fromArrowSchema(Schema schema) { List columns = new ArrayList<>(); for (Field field : schema.getFields()) { - boolean isUnique = Objects.equals(field.getMetadata().get(CQ_EXTENSION_UNIQUE), "true"); - boolean isPrimaryKey = - Objects.equals(field.getMetadata().get(CQ_EXTENSION_PRIMARY_KEY), "true"); - boolean isIncrementalKey = - Objects.equals(field.getMetadata().get(CQ_EXTENSION_INCREMENTAL), "true"); - - columns.add( - Column.builder() - .name(field.getName()) - .unique(isUnique) - .primaryKey(isPrimaryKey) - .incrementalKey(isIncrementalKey) - .type(field.getType()) - .build()); + columns.add(getColumn(field)); } Map metaData = schema.getCustomMetadata(); @@ -256,6 +244,34 @@ public static Table fromArrowSchema(Schema schema) { return tableBuilder.build(); } + private static Column getColumn(Field field) { + boolean isUnique = Objects.equals(field.getMetadata().get(CQ_EXTENSION_UNIQUE), "true"); + boolean isPrimaryKey = + Objects.equals(field.getMetadata().get(CQ_EXTENSION_PRIMARY_KEY), "true"); + boolean isIncrementalKey = + Objects.equals(field.getMetadata().get(CQ_EXTENSION_INCREMENTAL), "true"); + + ArrowType fieldType = field.getType(); + String extensionName = field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME); + String extensionMetadata = field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA); + + // We need to scan our extension types manually because of https://github.com/apache/arrow/issues/38891 + if (JSONType.EXTENSION_NAME.equals(extensionName) && JSONType.INSTANCE.serialize().equals(extensionMetadata) && JSONType.INSTANCE.storageType().equals(fieldType)) { + fieldType = JSONType.INSTANCE; + } else if (UUIDType.EXTENSION_NAME.equals(extensionName) && UUIDType.INSTANCE.serialize().equals(extensionMetadata) && UUIDType.INSTANCE.storageType().equals(fieldType)) { + fieldType = UUIDType.INSTANCE; + } + + return Column.builder() + .name(field.getName()) + .unique(isUnique) + .primaryKey(isPrimaryKey) + .incrementalKey(isIncrementalKey) + .type(fieldType) + .build(); + } + + public static ByteString encode(Resource resource) throws IOException { try (BufferAllocator bufferAllocator = new RootAllocator()) { Table table = resource.getTable(); diff --git a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java index 580d589..e1ebe2b 100644 --- a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java +++ b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java @@ -9,6 +9,7 @@ import static io.cloudquery.helper.ArrowHelper.CQ_TABLE_NAME; import static io.cloudquery.helper.ArrowHelper.CQ_TABLE_TITLE; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import com.google.protobuf.ByteString; import io.cloudquery.schema.Column; @@ -96,29 +97,57 @@ public class ArrowHelperTest { public void testToArrowSchema() { Schema arrowSchema = ArrowHelper.toArrowSchema(TEST_TABLE); - assertEquals(arrowSchema.getFields().get(0).getName(), "string_column1"); - assertEquals( - arrowSchema.getFields().get(0).getMetadata(), - Map.of( + + for (Column col : TEST_TABLE.getColumns()) { + int idx = TEST_TABLE.indexOfColumn(col.getName()); + Field field = arrowSchema.getFields().get(idx); + assertEquals(col.getName(), field.getName()); + if (idx == 0) { + assertEquals( + Map.of( CQ_EXTENSION_UNIQUE, "true", CQ_EXTENSION_INCREMENTAL, "true", CQ_EXTENSION_PRIMARY_KEY, - "true")); - assertEquals(arrowSchema.getFields().get(1).getName(), "string_column2"); - assertEquals( - arrowSchema.getFields().get(1).getMetadata(), - Map.of( + "true"), + field.getMetadata() + ); + } else if (col.getName().equals("json")) { + assertEquals( + Map.of( + CQ_EXTENSION_UNIQUE, "false", + CQ_EXTENSION_INCREMENTAL, "false", + CQ_EXTENSION_PRIMARY_KEY, "false", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, "json", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, "json-serialized" + ), + field.getMetadata() + ); + } else if (col.getName().equals("uuid")) { + assertEquals( + Map.of( + CQ_EXTENSION_UNIQUE, "false", + CQ_EXTENSION_INCREMENTAL, "false", + CQ_EXTENSION_PRIMARY_KEY, "false", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, "uuid", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, "uuid-serialized" + ), + field.getMetadata() + ); + } else { + assertEquals( + Map.of( CQ_EXTENSION_UNIQUE, "false", CQ_EXTENSION_INCREMENTAL, "false", CQ_EXTENSION_PRIMARY_KEY, - "false")); - - assertEquals(arrowSchema.getFields().get(2).getName(), "boolean_column"); - assertEquals(arrowSchema.getFields().get(3).getName(), "date_days_column"); + "false"), + field.getMetadata() + ); + } + } assertEquals( arrowSchema.getCustomMetadata(), @@ -127,7 +156,9 @@ public void testToArrowSchema() { CQ_TABLE_DESCRIPTION, "A simple test table", CQ_TABLE_TITLE, "Test table title", CQ_TABLE_DEPENDS_ON, "parent", - CQ_EXTENSION_CONSTRAINT_NAME, "")); + CQ_EXTENSION_CONSTRAINT_NAME, "" + ) + ); } @Test @@ -160,10 +191,13 @@ public void testRoundTripTableEncoding() throws IOException { assertEquals(table.getDescription(), TEST_TABLE.getDescription()); assertEquals(table.getTitle(), TEST_TABLE.getTitle()); assertEquals(table.getParent().getName(), TEST_TABLE.getParent().getName()); + assertEquals(TEST_TABLE.getColumns().size(), table.getColumns().size()); for (int i = 0; i < TEST_TABLE.getColumns().size(); i++) { - assertEquals(TEST_TABLE.getColumns().get(i).getName(), table.getColumns().get(i).getName()); - assertEquals(TEST_TABLE.getColumns().get(i).getType(), table.getColumns().get(i).getType()); + Column srcCol = TEST_TABLE.getColumns().get(i); + Column dstCol = table.getColumns().get(i); + assertEquals(srcCol.getName(), dstCol.getName()); + assertEquals(srcCol.getType(), dstCol.getType()); } } From af2dfbedf4356a92e811b4d6f8dd4a683dd16e8f Mon Sep 17 00:00:00 2001 From: candiduslynx Date: Mon, 27 Nov 2023 13:14:22 +0200 Subject: [PATCH 6/7] fix tests --- .../io/cloudquery/helper/ArrowHelper.java | 29 ++++---- .../java/io/cloudquery/scalar/DateMilli.java | 8 +++ .../java/io/cloudquery/scalar/Number.java | 5 ++ .../io/cloudquery/helper/ArrowHelperTest.java | 66 ++++++++++--------- 4 files changed, 64 insertions(+), 44 deletions(-) diff --git a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java index 406077d..38e4965 100644 --- a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java +++ b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java @@ -21,10 +21,15 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Text; +import org.joou.UByte; +import org.joou.UInteger; +import org.joou.ULong; +import org.joou.UShort; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.channels.Channels; +import java.time.Duration; import java.util.*; import static java.util.Arrays.asList; @@ -61,16 +66,14 @@ private static void setVectorData(FieldVector vector, Object data) { dateMilliVector.set(0, (long) data); return; } - if (vector instanceof Decimal256Vector decimal256Vector) { - decimal256Vector.set(0, (java.math.BigDecimal) data); - return; - } - if (vector instanceof DecimalVector decimalVector) { - decimalVector.set(0, (java.math.BigDecimal) data); - return; - } if (vector instanceof DurationVector durationVector) { - durationVector.set(0, (long) data); + Duration duration = (Duration) data; + switch (durationVector.getUnit()) { + case SECOND -> { durationVector.set(0, duration.toSeconds());} + case MILLISECOND -> { durationVector.set(0, duration.toMillis());} + case MICROSECOND -> { durationVector.set(0, duration.toNanos() / 1000);} + case NANOSECOND -> { durationVector.set(0, duration.toNanos());} + } return; } if (vector instanceof FixedSizeBinaryVector fixedSizeBinaryVector) { @@ -126,19 +129,19 @@ private static void setVectorData(FieldVector vector, Object data) { return; } if (vector instanceof UInt1Vector uInt1Vector) { - uInt1Vector.set(0, (byte) data); + uInt1Vector.set(0, ((UByte) data).shortValue()); return; } if (vector instanceof UInt2Vector uInt2Vector) { - uInt2Vector.set(0, (short) data); + uInt2Vector.set(0, ((UShort) data).intValue()); return; } if (vector instanceof UInt4Vector uInt4Vector) { - uInt4Vector.set(0, (int) data); + uInt4Vector.set(0, ((UInteger) data).intValue()); return; } if (vector instanceof UInt8Vector uInt8Vector) { - uInt8Vector.set(0, (long) data); + uInt8Vector.set(0, ((ULong) data).longValue()); return; } if (vector instanceof VarBinaryVector varBinaryVector) { diff --git a/lib/src/main/java/io/cloudquery/scalar/DateMilli.java b/lib/src/main/java/io/cloudquery/scalar/DateMilli.java index 524d585..5dc0ee7 100644 --- a/lib/src/main/java/io/cloudquery/scalar/DateMilli.java +++ b/lib/src/main/java/io/cloudquery/scalar/DateMilli.java @@ -3,6 +3,8 @@ import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.pojo.ArrowType; +import java.time.LocalDateTime; + public class DateMilli extends Scalar { public DateMilli() { super(); @@ -34,6 +36,12 @@ public void setValue(Object value) throws ValidationException { return; } + if (value instanceof LocalDateTime localDateTime) { + // we actually store only date + this.value = localDateTime.toLocalDate().toEpochDay(); + return; + } + throw new ValidationException( ValidationException.NO_CONVERSION_AVAILABLE, this.dataType(), value); } diff --git a/lib/src/main/java/io/cloudquery/scalar/Number.java b/lib/src/main/java/io/cloudquery/scalar/Number.java index c8891cf..6c668f2 100644 --- a/lib/src/main/java/io/cloudquery/scalar/Number.java +++ b/lib/src/main/java/io/cloudquery/scalar/Number.java @@ -207,6 +207,11 @@ protected void setValue(Object value) throws ValidationException { return; } + if (value instanceof Character character) { + this.value = UShort.valueOf(character); + return; + } + throw new ValidationException( ValidationException.NO_CONVERSION_AVAILABLE, this.dataType(), value); } diff --git a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java index e1ebe2b..91c1071 100644 --- a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java +++ b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java @@ -1,31 +1,12 @@ package io.cloudquery.helper; -import static io.cloudquery.helper.ArrowHelper.CQ_EXTENSION_CONSTRAINT_NAME; -import static io.cloudquery.helper.ArrowHelper.CQ_EXTENSION_INCREMENTAL; -import static io.cloudquery.helper.ArrowHelper.CQ_EXTENSION_PRIMARY_KEY; -import static io.cloudquery.helper.ArrowHelper.CQ_EXTENSION_UNIQUE; -import static io.cloudquery.helper.ArrowHelper.CQ_TABLE_DEPENDS_ON; -import static io.cloudquery.helper.ArrowHelper.CQ_TABLE_DESCRIPTION; -import static io.cloudquery.helper.ArrowHelper.CQ_TABLE_NAME; -import static io.cloudquery.helper.ArrowHelper.CQ_TABLE_TITLE; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; - import com.google.protobuf.ByteString; import io.cloudquery.schema.Column; import io.cloudquery.schema.Resource; import io.cloudquery.schema.Table; -import java.io.IOException; -import java.time.LocalDate; -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.util.List; -import java.util.Map; -import java.util.SimpleTimeZone; -import java.util.TimeZone; - import io.cloudquery.types.JSONType; import io.cloudquery.types.UUIDType; +import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types; @@ -35,6 +16,17 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.io.IOException; +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static io.cloudquery.helper.ArrowHelper.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class ArrowHelperTest { public static final Table TEST_TABLE = @@ -56,23 +48,16 @@ public class ArrowHelperTest { Column.builder().name("bit").type(Types.MinorType.BIT.getType()).build(), Column.builder().name("date_day").type(Types.MinorType.DATEDAY.getType()).build(), Column.builder().name("date_milli").type(Types.MinorType.DATEMILLI.getType()).build(), - Column.builder().name("decimal256").type(ArrowType.Decimal.createDecimal(10,20,256)).build(), - Column.builder().name("decimal128").type(ArrowType.Decimal.createDecimal(10,20,128)).build(), Column.builder().name("duration_s").type(new ArrowType.Duration(TimeUnit.SECOND)).build(), Column.builder().name("duration_ms").type(new ArrowType.Duration(TimeUnit.MILLISECOND)).build(), Column.builder().name("duration_us").type(new ArrowType.Duration(TimeUnit.MICROSECOND)).build(), Column.builder().name("duration_ns").type(new ArrowType.Duration(TimeUnit.NANOSECOND)).build(), - Column.builder().name("fixed_size_binary_32").type(new ArrowType.FixedSizeBinary(32)).build(), Column.builder().name("float4").type(Types.MinorType.FLOAT4.getType()).build(), Column.builder().name("float8").type(Types.MinorType.FLOAT8.getType()).build(), Column.builder().name("int").type(Types.MinorType.INT.getType()).build(), Column.builder().name("large_varbinary").type(Types.MinorType.LARGEVARBINARY.getType()).build(), Column.builder().name("large_varchar").type(Types.MinorType.LARGEVARCHAR.getType()).build(), Column.builder().name("small_int").type(Types.MinorType.SMALLINT.getType()).build(), - Column.builder().name("time_s").type(Types.MinorType.TIMESEC.getType()).build(), - Column.builder().name("time_ms").type(Types.MinorType.TIMEMILLI.getType()).build(), - Column.builder().name("time_us").type(Types.MinorType.TIMEMICRO.getType()).build(), - Column.builder().name("time_ns").type(Types.MinorType.TIMENANO.getType()).build(), Column.builder().name("timestamp_s").type(Types.MinorType.TIMESTAMPSEC.getType()).build(), Column.builder().name("timestamp_ms").type(Types.MinorType.TIMESTAMPMILLI.getType()).build(), Column.builder().name("timestamp_us").type(Types.MinorType.TIMESTAMPMICRO.getType()).build(), @@ -204,10 +189,29 @@ public void testRoundTripTableEncoding() throws IOException { @Test public void testRoundTripResourceEncoding() throws Exception { Resource resource = Resource.builder().table(TEST_TABLE).build(); - resource.set("string_column1", "test_data"); - resource.set("string_column2", "test_data2"); - resource.set("date_days_column", (int) LocalDate.parse("2023-11-24").toEpochDay()); - resource.set("boolean_column", true); + resource.set("pk", "test_pk"); + resource.set("big_int", -1024L); + resource.set("date_day", (int) LocalDateTime.now().toLocalDate().toEpochDay()); + resource.set("date_milli", LocalDateTime.now().toEpochSecond(ZoneOffset.UTC) * 1000); + resource.set("duration_s", Duration.ofSeconds(1024)); + resource.set("duration_ms", Duration.ofMillis(1024)); + resource.set("duration_us", Duration.ofNanos(1024000)); + resource.set("duration_ns", Duration.ofNanos(1024)); + resource.set("float4", 5.0F); + resource.set("float8", 5.0D); + resource.set("int", -1024); + resource.set("large_varbinary", "1234"); + resource.set("large_varchar", "1234"); + resource.set("small_int", (short) -1024); + resource.set("tiny_int", (byte) -100); + resource.set("uint1", (byte) 100); + resource.set("uint2", (short) 1024); + resource.set("uint4", 1024); + resource.set("uint8", 1024L); + resource.set("varbinary", "1234"); + resource.set("varchar", "1234"); + resource.set("json", "{\"a\":1234}"); + resource.set("uuid", UUID.randomUUID()); Assertions.assertDoesNotThrow( () -> { From 66f5e53e62761256d590dfb8f0108f10cd473eca Mon Sep 17 00:00:00 2001 From: candiduslynx Date: Mon, 27 Nov 2023 13:20:58 +0200 Subject: [PATCH 7/7] ./gradlew :lib:spotlessApply --- .../io/cloudquery/helper/ArrowHelper.java | 60 ++++-- .../java/io/cloudquery/scalar/DateMilli.java | 3 +- .../io/cloudquery/helper/ArrowHelperTest.java | 193 +++++++++++------- .../transformers/TransformWithClassTest.java | 11 +- .../transformers/TypeTransformerTest.java | 6 +- 5 files changed, 168 insertions(+), 105 deletions(-) diff --git a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java index 38e4965..be21059 100644 --- a/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java +++ b/lib/src/main/java/io/cloudquery/helper/ArrowHelper.java @@ -1,5 +1,7 @@ package io.cloudquery.helper; +import static java.util.Arrays.asList; + import com.google.protobuf.ByteString; import io.cloudquery.scalar.ValidationException; import io.cloudquery.schema.Column; @@ -10,6 +12,11 @@ import io.cloudquery.types.JSONType.JSONVector; import io.cloudquery.types.UUIDType; import io.cloudquery.types.UUIDType.UUIDVector; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.channels.Channels; +import java.time.Duration; +import java.util.*; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.*; @@ -26,14 +33,6 @@ import org.joou.ULong; import org.joou.UShort; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.time.Duration; -import java.util.*; - -import static java.util.Arrays.asList; - public class ArrowHelper { public static final String CQ_EXTENSION_INCREMENTAL = "cq:extension:incremental"; public static final String CQ_EXTENSION_CONSTRAINT_NAME = "cq:extension:constraint_name"; @@ -69,10 +68,18 @@ private static void setVectorData(FieldVector vector, Object data) { if (vector instanceof DurationVector durationVector) { Duration duration = (Duration) data; switch (durationVector.getUnit()) { - case SECOND -> { durationVector.set(0, duration.toSeconds());} - case MILLISECOND -> { durationVector.set(0, duration.toMillis());} - case MICROSECOND -> { durationVector.set(0, duration.toNanos() / 1000);} - case NANOSECOND -> { durationVector.set(0, duration.toNanos());} + case SECOND -> { + durationVector.set(0, duration.toSeconds()); + } + case MILLISECOND -> { + durationVector.set(0, duration.toMillis()); + } + case MICROSECOND -> { + durationVector.set(0, duration.toNanos() / 1000); + } + case NANOSECOND -> { + durationVector.set(0, duration.toNanos()); + } } return; } @@ -217,7 +224,10 @@ private static Field getField(Column column) { metadata.put(CQ_EXTENSION_UNIQUE, Boolean.toString(column.isUnique())); metadata.put(CQ_EXTENSION_PRIMARY_KEY, Boolean.toString(column.isPrimaryKey())); metadata.put(CQ_EXTENSION_INCREMENTAL, Boolean.toString(column.isIncrementalKey())); - return new Field(column.getName(), new FieldType(!column.isNotNull(), column.getType(), null, metadata), null); + return new Field( + column.getName(), + new FieldType(!column.isNotNull(), column.getType(), null, metadata), + null); } public static Table fromArrowSchema(Schema schema) { @@ -250,22 +260,29 @@ public static Table fromArrowSchema(Schema schema) { private static Column getColumn(Field field) { boolean isUnique = Objects.equals(field.getMetadata().get(CQ_EXTENSION_UNIQUE), "true"); boolean isPrimaryKey = - Objects.equals(field.getMetadata().get(CQ_EXTENSION_PRIMARY_KEY), "true"); + Objects.equals(field.getMetadata().get(CQ_EXTENSION_PRIMARY_KEY), "true"); boolean isIncrementalKey = - Objects.equals(field.getMetadata().get(CQ_EXTENSION_INCREMENTAL), "true"); + Objects.equals(field.getMetadata().get(CQ_EXTENSION_INCREMENTAL), "true"); ArrowType fieldType = field.getType(); - String extensionName = field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME); - String extensionMetadata = field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA); + String extensionName = + field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME); + String extensionMetadata = + field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA); - // We need to scan our extension types manually because of https://github.com/apache/arrow/issues/38891 - if (JSONType.EXTENSION_NAME.equals(extensionName) && JSONType.INSTANCE.serialize().equals(extensionMetadata) && JSONType.INSTANCE.storageType().equals(fieldType)) { + // We need to scan our extension types manually because of + // https://github.com/apache/arrow/issues/38891 + if (JSONType.EXTENSION_NAME.equals(extensionName) + && JSONType.INSTANCE.serialize().equals(extensionMetadata) + && JSONType.INSTANCE.storageType().equals(fieldType)) { fieldType = JSONType.INSTANCE; - } else if (UUIDType.EXTENSION_NAME.equals(extensionName) && UUIDType.INSTANCE.serialize().equals(extensionMetadata) && UUIDType.INSTANCE.storageType().equals(fieldType)) { + } else if (UUIDType.EXTENSION_NAME.equals(extensionName) + && UUIDType.INSTANCE.serialize().equals(extensionMetadata) + && UUIDType.INSTANCE.storageType().equals(fieldType)) { fieldType = UUIDType.INSTANCE; } - return Column.builder() + return Column.builder() .name(field.getName()) .unique(isUnique) .primaryKey(isPrimaryKey) @@ -274,7 +291,6 @@ private static Column getColumn(Field field) { .build(); } - public static ByteString encode(Resource resource) throws IOException { try (BufferAllocator bufferAllocator = new RootAllocator()) { Table table = resource.getTable(); diff --git a/lib/src/main/java/io/cloudquery/scalar/DateMilli.java b/lib/src/main/java/io/cloudquery/scalar/DateMilli.java index 5dc0ee7..97aec0f 100644 --- a/lib/src/main/java/io/cloudquery/scalar/DateMilli.java +++ b/lib/src/main/java/io/cloudquery/scalar/DateMilli.java @@ -1,10 +1,9 @@ package io.cloudquery.scalar; +import java.time.LocalDateTime; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.pojo.ArrowType; -import java.time.LocalDateTime; - public class DateMilli extends Scalar { public DateMilli() { super(); diff --git a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java index 91c1071..5c587e4 100644 --- a/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java +++ b/lib/src/test/java/io/cloudquery/helper/ArrowHelperTest.java @@ -1,12 +1,21 @@ package io.cloudquery.helper; +import static io.cloudquery.helper.ArrowHelper.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + import com.google.protobuf.ByteString; import io.cloudquery.schema.Column; import io.cloudquery.schema.Resource; import io.cloudquery.schema.Table; import io.cloudquery.types.JSONType; import io.cloudquery.types.UUIDType; -import org.apache.arrow.vector.DateDayVector; +import java.io.IOException; +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.List; +import java.util.Map; +import java.util.UUID; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types; @@ -16,17 +25,6 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.time.Duration; -import java.time.LocalDateTime; -import java.time.ZoneOffset; -import java.util.List; -import java.util.Map; -import java.util.UUID; - -import static io.cloudquery.helper.ArrowHelper.*; -import static org.junit.jupiter.api.Assertions.assertEquals; - public class ArrowHelperTest { public static final Table TEST_TABLE = @@ -47,90 +45,143 @@ public class ArrowHelperTest { Column.builder().name("big_int").type(Types.MinorType.BIGINT.getType()).build(), Column.builder().name("bit").type(Types.MinorType.BIT.getType()).build(), Column.builder().name("date_day").type(Types.MinorType.DATEDAY.getType()).build(), - Column.builder().name("date_milli").type(Types.MinorType.DATEMILLI.getType()).build(), - Column.builder().name("duration_s").type(new ArrowType.Duration(TimeUnit.SECOND)).build(), - Column.builder().name("duration_ms").type(new ArrowType.Duration(TimeUnit.MILLISECOND)).build(), - Column.builder().name("duration_us").type(new ArrowType.Duration(TimeUnit.MICROSECOND)).build(), - Column.builder().name("duration_ns").type(new ArrowType.Duration(TimeUnit.NANOSECOND)).build(), + Column.builder() + .name("date_milli") + .type(Types.MinorType.DATEMILLI.getType()) + .build(), + Column.builder() + .name("duration_s") + .type(new ArrowType.Duration(TimeUnit.SECOND)) + .build(), + Column.builder() + .name("duration_ms") + .type(new ArrowType.Duration(TimeUnit.MILLISECOND)) + .build(), + Column.builder() + .name("duration_us") + .type(new ArrowType.Duration(TimeUnit.MICROSECOND)) + .build(), + Column.builder() + .name("duration_ns") + .type(new ArrowType.Duration(TimeUnit.NANOSECOND)) + .build(), Column.builder().name("float4").type(Types.MinorType.FLOAT4.getType()).build(), Column.builder().name("float8").type(Types.MinorType.FLOAT8.getType()).build(), Column.builder().name("int").type(Types.MinorType.INT.getType()).build(), - Column.builder().name("large_varbinary").type(Types.MinorType.LARGEVARBINARY.getType()).build(), - Column.builder().name("large_varchar").type(Types.MinorType.LARGEVARCHAR.getType()).build(), - Column.builder().name("small_int").type(Types.MinorType.SMALLINT.getType()).build(), - Column.builder().name("timestamp_s").type(Types.MinorType.TIMESTAMPSEC.getType()).build(), - Column.builder().name("timestamp_ms").type(Types.MinorType.TIMESTAMPMILLI.getType()).build(), - Column.builder().name("timestamp_us").type(Types.MinorType.TIMESTAMPMICRO.getType()).build(), - Column.builder().name("timestamp_ns").type(Types.MinorType.TIMESTAMPNANO.getType()).build(), - Column.builder().name("timestamp_s_tz").type(new ArrowType.Timestamp(TimeUnit.SECOND, ZoneOffset.UTC.getId())).build(), - Column.builder().name("timestamp_ms_tz").type(new ArrowType.Timestamp(TimeUnit.MILLISECOND, ZoneOffset.UTC.getId())).build(), - Column.builder().name("timestamp_us_tz").type(new ArrowType.Timestamp(TimeUnit.MICROSECOND, ZoneOffset.UTC.getId())).build(), - Column.builder().name("timestamp_ns_tz").type(new ArrowType.Timestamp(TimeUnit.NANOSECOND, ZoneOffset.UTC.getId())).build(), + Column.builder() + .name("large_varbinary") + .type(Types.MinorType.LARGEVARBINARY.getType()) + .build(), + Column.builder() + .name("large_varchar") + .type(Types.MinorType.LARGEVARCHAR.getType()) + .build(), + Column.builder() + .name("small_int") + .type(Types.MinorType.SMALLINT.getType()) + .build(), + Column.builder() + .name("timestamp_s") + .type(Types.MinorType.TIMESTAMPSEC.getType()) + .build(), + Column.builder() + .name("timestamp_ms") + .type(Types.MinorType.TIMESTAMPMILLI.getType()) + .build(), + Column.builder() + .name("timestamp_us") + .type(Types.MinorType.TIMESTAMPMICRO.getType()) + .build(), + Column.builder() + .name("timestamp_ns") + .type(Types.MinorType.TIMESTAMPNANO.getType()) + .build(), + Column.builder() + .name("timestamp_s_tz") + .type(new ArrowType.Timestamp(TimeUnit.SECOND, ZoneOffset.UTC.getId())) + .build(), + Column.builder() + .name("timestamp_ms_tz") + .type(new ArrowType.Timestamp(TimeUnit.MILLISECOND, ZoneOffset.UTC.getId())) + .build(), + Column.builder() + .name("timestamp_us_tz") + .type(new ArrowType.Timestamp(TimeUnit.MICROSECOND, ZoneOffset.UTC.getId())) + .build(), + Column.builder() + .name("timestamp_ns_tz") + .type(new ArrowType.Timestamp(TimeUnit.NANOSECOND, ZoneOffset.UTC.getId())) + .build(), Column.builder().name("tiny_int").type(Types.MinorType.TINYINT.getType()).build(), Column.builder().name("uint1").type(Types.MinorType.UINT1.getType()).build(), Column.builder().name("uint2").type(Types.MinorType.UINT2.getType()).build(), Column.builder().name("uint4").type(Types.MinorType.UINT4.getType()).build(), Column.builder().name("uint8").type(Types.MinorType.UINT8.getType()).build(), - Column.builder().name("varbinary").type(Types.MinorType.VARBINARY.getType()).build(), + Column.builder() + .name("varbinary") + .type(Types.MinorType.VARBINARY.getType()) + .build(), Column.builder().name("varchar").type(Types.MinorType.VARCHAR.getType()).build(), Column.builder().name("json").type(JSONType.INSTANCE).build(), - Column.builder().name("uuid").type(UUIDType.INSTANCE).build() - ) - ).build(); + Column.builder().name("uuid").type(UUIDType.INSTANCE).build())) + .build(); @Test public void testToArrowSchema() { Schema arrowSchema = ArrowHelper.toArrowSchema(TEST_TABLE); - for (Column col : TEST_TABLE.getColumns()) { int idx = TEST_TABLE.indexOfColumn(col.getName()); Field field = arrowSchema.getFields().get(idx); assertEquals(col.getName(), field.getName()); if (idx == 0) { assertEquals( - Map.of( - CQ_EXTENSION_UNIQUE, - "true", - CQ_EXTENSION_INCREMENTAL, - "true", - CQ_EXTENSION_PRIMARY_KEY, - "true"), - field.getMetadata() - ); + Map.of( + CQ_EXTENSION_UNIQUE, + "true", + CQ_EXTENSION_INCREMENTAL, + "true", + CQ_EXTENSION_PRIMARY_KEY, + "true"), + field.getMetadata()); } else if (col.getName().equals("json")) { assertEquals( - Map.of( - CQ_EXTENSION_UNIQUE, "false", - CQ_EXTENSION_INCREMENTAL, "false", - CQ_EXTENSION_PRIMARY_KEY, "false", - ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, "json", - ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, "json-serialized" - ), - field.getMetadata() - ); + Map.of( + CQ_EXTENSION_UNIQUE, + "false", + CQ_EXTENSION_INCREMENTAL, + "false", + CQ_EXTENSION_PRIMARY_KEY, + "false", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, + "json", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, + "json-serialized"), + field.getMetadata()); } else if (col.getName().equals("uuid")) { assertEquals( - Map.of( - CQ_EXTENSION_UNIQUE, "false", - CQ_EXTENSION_INCREMENTAL, "false", - CQ_EXTENSION_PRIMARY_KEY, "false", - ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, "uuid", - ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, "uuid-serialized" - ), - field.getMetadata() - ); + Map.of( + CQ_EXTENSION_UNIQUE, + "false", + CQ_EXTENSION_INCREMENTAL, + "false", + CQ_EXTENSION_PRIMARY_KEY, + "false", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, + "uuid", + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, + "uuid-serialized"), + field.getMetadata()); } else { assertEquals( - Map.of( - CQ_EXTENSION_UNIQUE, - "false", - CQ_EXTENSION_INCREMENTAL, - "false", - CQ_EXTENSION_PRIMARY_KEY, - "false"), - field.getMetadata() - ); + Map.of( + CQ_EXTENSION_UNIQUE, + "false", + CQ_EXTENSION_INCREMENTAL, + "false", + CQ_EXTENSION_PRIMARY_KEY, + "false"), + field.getMetadata()); } } @@ -141,9 +192,7 @@ public void testToArrowSchema() { CQ_TABLE_DESCRIPTION, "A simple test table", CQ_TABLE_TITLE, "Test table title", CQ_TABLE_DEPENDS_ON, "parent", - CQ_EXTENSION_CONSTRAINT_NAME, "" - ) - ); + CQ_EXTENSION_CONSTRAINT_NAME, "")); } @Test diff --git a/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java b/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java index 1c77694..f857bf5 100644 --- a/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java +++ b/lib/src/test/java/io/cloudquery/transformers/TransformWithClassTest.java @@ -13,13 +13,12 @@ import io.cloudquery.schema.Column; import io.cloudquery.schema.Table; import io.cloudquery.types.JSONType; +import io.cloudquery.types.UUIDType; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.util.List; import java.util.Optional; import java.util.UUID; - -import io.cloudquery.types.UUIDType; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.TimeUnit; import org.junit.jupiter.api.BeforeEach; @@ -95,9 +94,11 @@ public static final class TestClass { Column.builder().name("string_list_col").type(JSONType.INSTANCE).build(), Column.builder().name("byte_array_col").type(Binary.INSTANCE).build(), Column.builder().name("any_array_col").type(JSONType.INSTANCE).build(), - Column.builder().name("time_col").type(new Timestamp(TimeUnit.MILLISECOND, ZoneOffset.UTC.getId())).build(), - Column.builder().name("uuid_col").type(UUIDType.INSTANCE).build() - ); + Column.builder() + .name("time_col") + .type(new Timestamp(TimeUnit.MILLISECOND, ZoneOffset.UTC.getId())) + .build(), + Column.builder().name("uuid_col").type(UUIDType.INSTANCE).build()); public static final List expectedColumnsSimpleClass = List.of( diff --git a/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java b/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java index bf8774c..882928e 100644 --- a/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java +++ b/lib/src/test/java/io/cloudquery/transformers/TypeTransformerTest.java @@ -4,14 +4,13 @@ import io.cloudquery.transformers.TypeTransformer.DefaultTypeTransformer; import io.cloudquery.types.JSONType; +import io.cloudquery.types.UUIDType; import java.net.InetAddress; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.util.Map; import java.util.UUID; import java.util.stream.Stream; - -import io.cloudquery.types.UUIDType; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -112,7 +111,6 @@ public static Stream testArgumentsSource() { Arguments.of("byteArrayField", ArrowType.Binary.INSTANCE), // Object array - Arguments.of("objectArrayField", JSONType.INSTANCE) - ); + Arguments.of("objectArrayField", JSONType.INSTANCE)); } }