From b67c7b03e2f02fca601ad8702428742d0442a4e4 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Wed, 11 Apr 2018 12:04:09 -0700 Subject: [PATCH] Cast value which doesn't match to any rules as string (#22) * force cast as string if a given value doesn't match to any rules * Throw it away to parse integer and double * Add testConvertToDatastoreValue * Modify README * Change default time zone for timestamp --- README.md | 6 +- pom.xml | 2 +- .../github/yuiskw/beam/TableRow2EntityFn.java | 54 ++++++++++++----- .../yuiskw/beam/TableRow2EntityFnTest.java | 59 +++++++++++++++++++ 4 files changed, 101 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index d2e9db3..7d720d7 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Apache Beam's `DatastoreIO` doesn't allow us to write same key at once. mvn clean package # Run bigquery-to-datastore via the compiled JAR file -java -cp $(pwd)/target/bigquery-to-datastore-bundled-0.5.0.jar \ +java -cp $(pwd)/target/bigquery-to-datastore-bundled-0.5.1.jar \ com.github.yuiskw.beam.BigQuery2Datastore \ --project=your-gcp-project \ --runner=DataflowRunner \ @@ -71,7 +71,7 @@ mvn clean package make package # run -java -cp $(pwd)/target/bigquery-to-datastore-bundled-0.5.0.jar --help +java -cp $(pwd)/target/bigquery-to-datastore-bundled-0.5.1.jar --help # or ./bin/bigquery-to-datastore --help ``` @@ -80,7 +80,7 @@ java -cp $(pwd)/target/bigquery-to-datastore-bundled-0.5.0.jar --help We also offers docker images for this project in [yuiskw/bigquery\-to\-datastore \- Docker Hub](https://hub.docker.com/r/yuiskw/bigquery-to-datastore/). We have several docker images based on Apache Beam versions. ``` -docker run yuiskw/bigquery-to-datastore:0.5.0-beam-2.1 --help +docker run yuiskw/bigquery-to-datastore:0.5.1-beam-2.1 --help ``` ### How to install it with homebrew diff --git a/pom.xml b/pom.xml index fc8212b..9534c6a 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ com.github.yuiskw bigquery-to-datastore - 0.5.0 + 0.5.1 jar diff --git a/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java b/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java index 918131f..8d37f3f 100644 --- a/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java +++ b/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java @@ -109,11 +109,11 @@ else if (value instanceof java.lang.Integer) { v = Value.newBuilder().setIntegerValue(((Integer) value).intValue()) .setExcludeFromIndexes(isExcluded).build(); } - else if (value instanceof String && parseInteger((String) value) != null) { - Integer integer = parseInteger((String) value); - v = Value.newBuilder().setIntegerValue(integer.intValue()) - .setExcludeFromIndexes(isExcluded).build(); - } + // else if (value instanceof String && parseInteger((String) value) != null) { + // Integer integer = parseInteger((String) value); + // v = Value.newBuilder().setIntegerValue(integer.intValue()) + // .setExcludeFromIndexes(isExcluded).build(); + // } // LONG else if (value instanceof java.lang.Long) { v = Value.newBuilder().setIntegerValue((int) ((Long) value).longValue()) @@ -124,6 +124,10 @@ else if (value instanceof java.lang.Double) { v = Value.newBuilder().setDoubleValue(((Double) value).doubleValue()) .setExcludeFromIndexes(isExcluded).build(); } + // else if (value instanceof String && parseDouble((String) value) != null) { + // v = Value.newBuilder().setDoubleValue(((Double) value).doubleValue()) + // .setExcludeFromIndexes(isExcluded).build(); + // } // TIMESTAMP else if (value instanceof org.joda.time.LocalDateTime) { Timestamp timestamp = toTimestamp(((LocalDateTime) value).toLocalDate().toDate()); @@ -147,11 +151,6 @@ else if (value instanceof org.joda.time.LocalDate) { v = Value.newBuilder().setTimestampValue(timestamp) .setExcludeFromIndexes(isExcluded).build(); } - // STRING - else if (value instanceof String) { - v = Value.newBuilder().setStringValue((String) value) - .setExcludeFromIndexes(isExcluded).build(); - } // RECORD else if (value instanceof List) { ArrayValue.Builder arrayValueBuilder = ArrayValue.newBuilder(); @@ -176,6 +175,12 @@ else if (value instanceof Map) { } v = Value.newBuilder().setEntityValue(subEntityBuilder.build()).build(); } + // String + // If a given value is not match the above rule, it deals with it as string. + else { + v = Value.newBuilder().setStringValue(value.toString()) + .setExcludeFromIndexes(isExcluded).build(); + } return v; } @@ -240,17 +245,34 @@ public Key getKey(String name) { * Parse integer value * * @param value String - * @return parsed integer of null if given value is not integer + * @return parsed integer or null if given value is not integer */ public static Integer parseInteger(String value) { - Integer integer = null; + Integer parsed = null; + try { + parsed = Integer.valueOf(value); + } catch (NumberFormatException e) { + // Do nothing. + ; + } + return parsed; + } + + /** + * Parse double value + * + * @param value String + * @return parsed double or null if given value is not double + */ + public static Double parseDouble(String value) { + Double parsed = null; try { - integer = Integer.valueOf(value); + parsed = Double.valueOf(value); } catch (NumberFormatException e) { // Do nothing. ; } - return integer; + return parsed; } /** @@ -264,7 +286,7 @@ public static Instant parseDate(String value) { DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-M-d") .withResolverStyle(ResolverStyle.SMART); java.time.LocalDate localDate = java.time.LocalDate.parse(value, formatter); - instant = localDate.atStartOfDay().atZone(ZoneId.systemDefault()).toInstant(); + instant = localDate.atStartOfDay().atZone(ZoneId.of("UTC")).toInstant(); } catch (DateTimeParseException e) { // Do nothing. ; @@ -295,7 +317,7 @@ public static Instant parseTimestamp(String value) { DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern) .withResolverStyle(ResolverStyle.SMART); java.time.LocalDateTime localDateTime = java.time.LocalDateTime.parse(value, formatter); - instant = localDateTime.atZone(ZoneId.systemDefault()).toInstant(); + instant = localDateTime.atZone(ZoneId.of("UTC")).toInstant(); return instant; } catch (DateTimeParseException e) { // Do nothing. diff --git a/src/test/java/com/github/yuiskw/beam/TableRow2EntityFnTest.java b/src/test/java/com/github/yuiskw/beam/TableRow2EntityFnTest.java index dc8dfeb..f3665cc 100644 --- a/src/test/java/com/github/yuiskw/beam/TableRow2EntityFnTest.java +++ b/src/test/java/com/github/yuiskw/beam/TableRow2EntityFnTest.java @@ -91,6 +91,65 @@ public void testConvert2() { } } + @Test + public void testConvertToDatastoreValue() { + TableRow2EntityFn fn =new TableRow2EntityFn( + projectId, namespace, null, kind, keyColumn, indexedColumns); + + Value v = null; + + // String + v = fn.convertToDatastoreValue("value", "hello, world"); + assertEquals("hello, world", v.getStringValue()); + + // Integer + v = fn.convertToDatastoreValue("value", 123); + assertEquals(123, v.getIntegerValue()); + + // Double + v = fn.convertToDatastoreValue("value", 123.456); + assertEquals(123.456, v.getDoubleValue(), 1e-3); + + // Timestamp + v = fn.convertToDatastoreValue("value", "2018-01-01 01:23:45"); + assertEquals(1514769825, v.getTimestampValue().getSeconds()); + + // Date + v = fn.convertToDatastoreValue("value", "2018-01-01"); + assertEquals(1514764800, v.getTimestampValue().getSeconds()); + + // Array + v = fn.convertToDatastoreValue("value", Arrays.asList(1, 2, 3)); + assertEquals(3, v.getArrayValue().getValuesCount()); + + // Struct + Map subMap = new HashMap(); + subMap.put("int", 123); + subMap.put("array", Arrays.asList(1, 2, 3)); + + Map map = new HashMap(); + map.put("int", 123); + map.put("double", 123.456); + map.put("string", "hello, world"); + map.put("timestamp", "2018-01-01 01:23:45"); + map.put("date", "2018-01-01"); + map.put("array", Arrays.asList(1, 2, 3)); + map.put("struct", subMap); + + v = fn.convertToDatastoreValue("value", map); + Entity entity = v.getEntityValue(); + assertEquals(123, entity.getPropertiesOrThrow("int").getIntegerValue()); + assertEquals(123.456, entity.getPropertiesOrThrow("double").getDoubleValue(), 1e-3); + assertEquals("hello, world", entity.getPropertiesOrThrow("string").getStringValue()); + assertEquals(1514769825, entity.getPropertiesOrThrow("timestamp").getTimestampValue().getSeconds()); + assertEquals(1514764800, entity.getPropertiesOrThrow("date").getTimestampValue().getSeconds()); + assertEquals(3, entity.getPropertiesOrThrow("array").getArrayValue().getValuesCount()); + + Entity subEntity = entity.getPropertiesOrThrow("struct").getEntityValue(); + assertEquals(123, subEntity.getPropertiesOrThrow("int").getIntegerValue()); + assertEquals(3, subEntity.getPropertiesOrThrow("array").getArrayValue().getValuesCount()); + } + @Test public void testIsDate() { assertNotNull(TableRow2EntityFn.parseDate("2017-01-01"));