Skip to content

Commit

Permalink
Merge pull request #233 from tigrisdata/main
Browse files Browse the repository at this point in the history
Beta release
  • Loading branch information
efirs authored May 1, 2023
2 parents 779e474 + fe5943d commit 4bd8491
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 58 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ifeq ($(GOOS), windows)
BIN=tigris.exe
endif

BUILD_PARAM=-tags=release -ldflags "-w -extldflags '-static' -X 'github.com/tigrisdata/tigris-cli/util.Version=$(VERSION)'" -o ${BIN} $(shell printenv BUILD_PARAM)
BUILD_PARAM=-tags=release -ldflags "-s -w -extldflags '-static' -X 'github.com/tigrisdata/tigris-cli/util.Version=$(VERSION)'" -o ${BIN} $(shell printenv BUILD_PARAM)
TEST_PARAM=-cover -race -tags=test $(shell printenv TEST_PARAM)

all: ${BIN}
Expand Down
9 changes: 9 additions & 0 deletions cmd/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,15 @@ func init() {
importCmd.Flags().StringVar(&CSVComment, "csv-comment", "",
"CSV comment")

importCmd.Flags().BoolVar(&schema.DetectByteArrays, "detect-byte-arrays", false,
"Try detect byte arrays fields")
importCmd.Flags().BoolVar(&schema.DetectUUIDs, "detect-uuids", true,
"Try detect UUID fields")
importCmd.Flags().BoolVar(&schema.DetectTimes, "detect-times", true,
"Try detect date time fields")
importCmd.Flags().BoolVar(&schema.DetectIntegers, "detect-integers", true,
"Try detect integer fields")

addProjectFlag(importCmd)
rootCmd.AddCommand(importCmd)
}
9 changes: 9 additions & 0 deletions cmd/search/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,15 @@ func init() {
importCmd.Flags().BoolVar(&NoCreate, "no-create-index", false,
"Do not create collection automatically if it doesn't exist")

importCmd.Flags().BoolVar(&schema.DetectByteArrays, "detect-byte-arrays", false,
"Try to detect byte arrays fields")
importCmd.Flags().BoolVar(&schema.DetectUUIDs, "detect-uuids", true,
"Try to detect UUID fields")
importCmd.Flags().BoolVar(&schema.DetectTimes, "detect-times", true,
"Try to detect date time fields")
importCmd.Flags().BoolVar(&schema.DetectIntegers, "detect-integers", true,
"Try to detect integer fields")

importCmd.Flags().StringVar(&CSVDelimiter, "csv-delimiter", "",
"CSV delimiter")
importCmd.Flags().BoolVar(&CSVTrimLeadingSpace, "csv-trim-leading-space", true,
Expand Down
99 changes: 56 additions & 43 deletions schema/inference.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ const (
)

var (
DetectByteArrays = false
DetectUUIDs = true
DetectTimes = true
DetectIntegers = true

ErrIncompatibleSchema = fmt.Errorf("error incompatible schema")
ErrExpectedString = fmt.Errorf("expected string type")
ErrExpectedNumber = fmt.Errorf("expected json.Number")
Expand All @@ -61,43 +66,49 @@ func parseDateTime(s string) bool {
return false
}

func translateStringType(v interface{}) (string, string, error) {
t := reflect.TypeOf(v)
func parseNumber(v any) (string, string, error) {
n, ok := v.(json.Number)
if !ok {
return "", "", ErrExpectedNumber
}

if t.PkgPath() == "encoding/json" && t.Name() == "Number" {
n, ok := v.(json.Number)
if !ok {
return "", "", ErrExpectedNumber
if _, err := n.Int64(); err != nil || !DetectIntegers {
_, err = n.Float64()
if err != nil {
return "", "", err
}

if _, err := n.Int64(); err != nil {
_, err = n.Float64()
if err != nil {
return "", "", err
}
return typeNumber, "", nil
}

return typeNumber, "", nil
}
return typeInteger, "", nil
}

func translateStringType(v interface{}) (string, string, error) {
t := reflect.TypeOf(v)

return typeInteger, "", nil
if t.PkgPath() == "encoding/json" && t.Name() == "Number" {
return parseNumber(v)
}

s, ok := v.(string)
if !ok {
return "", "", ErrExpectedString
}

if parseDateTime(s) {
if parseDateTime(s) && DetectTimes {
return typeString, formatDateTime, nil
}

if _, err := uuid.Parse(s); err == nil {
if _, err := uuid.Parse(s); err == nil && DetectUUIDs {
return typeString, formatUUID, nil
}

b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
if _, err := base64.StdEncoding.Decode(b, []byte(s)); err == nil {
return typeString, formatByte, nil
if len(s) != 0 && DetectByteArrays {
b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
if _, err := base64.StdEncoding.Decode(b, []byte(s)); err == nil {
return typeString, formatByte, nil
}
}

return typeString, "", nil
Expand All @@ -123,7 +134,8 @@ func translateType(v interface{}) (string, string, error) {
}
}

func extendedStringType(oldType string, oldFormat string, newType string, newFormat string) (string, string, error) {
func extendedStringType(name string, oldType string, oldFormat string, newType string, newFormat string,
) (string, string, error) {
if newFormat == "" {
switch {
case oldFormat == formatByte:
Expand All @@ -137,7 +149,7 @@ func extendedStringType(oldType string, oldFormat string, newType string, newFor
return oldType, oldFormat, nil
}

return "", "", ErrIncompatibleSchema
return "", "", fmt.Errorf("%w field: %s", ErrIncompatibleSchema, name)
}

// this is only matter for initial schema inference, where we have luxury to extend the type
Expand All @@ -147,7 +159,8 @@ func extendedStringType(oldType string, oldFormat string, newType string, newFor
// byte -> string
// time -> string
// uuid => string.
func extendedType(oldType string, oldFormat string, newType string, newFormat string) (string, string, error) {
func extendedType(name string, oldType string, oldFormat string, newType string, newFormat string,
) (string, string, error) {
if oldType == typeInteger && newType == typeNumber {
return newType, newFormat, nil
}
Expand All @@ -157,7 +170,7 @@ func extendedType(oldType string, oldFormat string, newType string, newFormat st
}

if oldType == typeString && newType == typeString {
if t, f, err := extendedStringType(oldType, oldFormat, newType, newFormat); err == nil {
if t, f, err := extendedStringType(name, oldType, oldFormat, newType, newFormat); err == nil {
return t, f, nil
}
}
Expand All @@ -166,13 +179,13 @@ func extendedType(oldType string, oldFormat string, newType string, newFormat st
return newType, newFormat, nil
}

log.Debug().Str("oldType", oldType).Str("newType", newType).Msg("incompatible schema")
log.Debug().Str("oldFormat", oldFormat).Str("newFormat", newFormat).Msg("incompatible schema")
log.Debug().Str("oldType", oldType).Str("newType", newType).Str("field_name", name).Msg("incompatible schema")
log.Debug().Str("oldFormat", oldFormat).Str("newFormat", newFormat).Str("field_name", name).Msg("incompatible schema")

return "", "", ErrIncompatibleSchema
return "", "", fmt.Errorf("%w field: %s", ErrIncompatibleSchema, name)
}

func traverseObject(existingField *schema.Field, newField *schema.Field, values map[string]any) error {
func traverseObject(name string, existingField *schema.Field, newField *schema.Field, values map[string]any) error {
switch {
case existingField == nil:
newField.Fields = make(map[string]*schema.Field)
Expand All @@ -186,13 +199,13 @@ func traverseObject(existingField *schema.Field, newField *schema.Field, values
log.Debug().Str("oldType", existingField.Type).Str("newType", newField.Type).Interface("values", values).
Msg("object converted to primitive")

return ErrIncompatibleSchema
return fmt.Errorf("%w field: %s", ErrIncompatibleSchema, name)
}

return traverseFields(newField.Fields, values, nil)
}

func traverseArray(existingField *schema.Field, newField *schema.Field, v any) error {
func traverseArray(name string, existingField *schema.Field, newField *schema.Field, v any) error {
for i := 0; i < reflect.ValueOf(v).Len(); i++ {
t, format, err := translateType(reflect.ValueOf(v).Index(i).Interface())
if err != nil {
Expand All @@ -209,11 +222,11 @@ func traverseArray(existingField *schema.Field, newField *schema.Field, v any) e
log.Debug().Str("oldType", existingField.Type).Str("newType", newField.Type).Interface("values", v).
Msg("object converted to primitive")

return ErrIncompatibleSchema
return fmt.Errorf("%w field: %s", ErrIncompatibleSchema, name)
}
}

nt, nf, err := extendedType(newField.Items.Type, newField.Items.Format, t, format)
nt, nf, err := extendedType(name, newField.Items.Type, newField.Items.Format, t, format)
if err != nil {
return err
}
Expand All @@ -223,7 +236,7 @@ func traverseArray(existingField *schema.Field, newField *schema.Field, v any) e

if t == typeObject {
values, _ := reflect.ValueOf(v).Index(i).Interface().(map[string]any)
if err = traverseObject(newField.Items, newField.Items, values); err != nil {
if err = traverseObject(name, newField.Items, newField.Items, values); err != nil {
return err
}

Expand All @@ -247,12 +260,12 @@ func setAutoGenerate(autoGen []string, name string, field *schema.Field) {
}
}

func traverseFieldsLow(t string, format string, k string, f *schema.Field, v any, sch map[string]*schema.Field,
func traverseFieldsLow(t string, format string, name string, f *schema.Field, v any, sch map[string]*schema.Field,
) (bool, error) {
switch {
case t == typeObject:
vm, _ := v.(map[string]any)
if err := traverseObject(sch[k], f, vm); err != nil {
if err := traverseObject(name, sch[name], f, vm); err != nil {
return false, err
}

Expand All @@ -265,15 +278,15 @@ func traverseFieldsLow(t string, format string, k string, f *schema.Field, v any
return true, nil // empty array does not reflect in the schema
}

if err := traverseArray(sch[k], f, v); err != nil {
if err := traverseArray(name, sch[name], f, v); err != nil {
return false, err
}

if f.Items == nil {
return true, nil // empty object
}
case sch[k] != nil:
nt, nf, err := extendedType(sch[k].Type, sch[k].Format, t, format)
case sch[name] != nil:
nt, nf, err := extendedType(name, sch[name].Type, sch[name].Format, t, format)
if err != nil {
return false, err
}
Expand All @@ -286,20 +299,20 @@ func traverseFieldsLow(t string, format string, k string, f *schema.Field, v any
}

func traverseFields(sch map[string]*schema.Field, fields map[string]any, autoGen []string) error {
for k, v := range fields {
for name, val := range fields {
// handle `null` JSON value
if v == nil {
if val == nil {
continue
}

t, format, err := translateType(v)
t, format, err := translateType(val)
if err != nil {
return err
}

f := &schema.Field{Type: t, Format: format}

skip, err := traverseFieldsLow(t, format, k, f, v, sch)
skip, err := traverseFieldsLow(t, format, name, f, val, sch)
if err != nil {
return err
}
Expand All @@ -308,9 +321,9 @@ func traverseFields(sch map[string]*schema.Field, fields map[string]any, autoGen
continue
}

setAutoGenerate(autoGen, k, f)
setAutoGenerate(autoGen, name, f)

sch[k] = f
sch[name] = f
}

return nil
Expand Down
23 changes: 13 additions & 10 deletions schema/inference_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package schema

import (
"encoding/json"
"fmt"
"testing"
"unsafe"

Expand Down Expand Up @@ -295,6 +296,8 @@ func TestSchemaInference(t *testing.T) {
},
}

DetectByteArrays = true

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
var sch schema.Schema
Expand All @@ -318,77 +321,77 @@ func TestSchemaInferenceNegative(t *testing.T) {
[]byte(`{ "incompatible_field" : 1 }`),
[]byte(`{ "incompatible_field" : "1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1" }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "incompatible_field"),
},
{
"incompatible_prim_to_object",
[][]byte{
[]byte(`{ "incompatible_field" : 1 }`),
[]byte(`{ "incompatible_field" : { "field1": "1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1" } }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "incompatible_field"),
},
{
"incompatible_object_to_prim",
[][]byte{
[]byte(`{ "incompatible_field" : { "field1": "1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1" } }`),
[]byte(`{ "incompatible_field" : 1 }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "incompatible_field"),
},
{
"incompatible_array_to_prim",
[][]byte{
[]byte(`{ "incompatible_field" : ["1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1"] }`),
[]byte(`{ "incompatible_field" : 1 }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "incompatible_field"),
},
{
"incompatible_prim_to_array",
[][]byte{
[]byte(`{ "incompatible_field" : 1 }`),
[]byte(`{ "incompatible_field" : ["1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1"] }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "incompatible_field"),
},
{
"incompatible_array_mixed",
[][]byte{
[]byte(`{ "incompatible_field" : ["1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1", 1] }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "incompatible_field"),
},
{
"incompatible_array",
[][]byte{
[]byte(`{ "incompatible_field" : [ 1 ] }`),
[]byte(`{ "incompatible_field" : ["1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1"] }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "incompatible_field"),
},
{
"incompatible_array_object_mixed",
[][]byte{
[]byte(`{ "incompatible_field" : [ { "one" : "1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1" }, { "one" : 1 } ] }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "one"),
},
{
"incompatible_array_object",
[][]byte{
[]byte(`{ "incompatible_field" : [ { "one" : "1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1" } ] }`),
[]byte(`{ "incompatible_field" : [ { "one" : 1 } ] }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "one"),
},
{
"incompatible_object",
[][]byte{
[]byte(`{ "incompatible_field" : { "one" : 1 } }`),
[]byte(`{ "incompatible_field" : { "one" : "1ed6ff32-4c0f-4553-9cd3-a2ea3d58e9d1" } }`),
},
ErrIncompatibleSchema,
fmt.Errorf("%w field: %s", ErrIncompatibleSchema, "one"),
},
}

Expand Down
Loading

0 comments on commit 4bd8491

Please sign in to comment.