Skip to content

Commit 8909d9e

Browse files
authored
Merge pull request #13 from mortenson/graph-perf
Move off of graph library for performance reasons, fix dependency issues with export
2 parents 0b2c1cf + fb1c1c7 commit 8909d9e

File tree

6 files changed

+56
-27
lines changed

6 files changed

+56
-27
lines changed

db.go

+45-23
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@ import (
44
"context"
55
"crypto/sha256"
66
"encoding/binary"
7-
"errors"
87
"fmt"
98
"log/slog"
109
"math/rand"
1110
"regexp"
11+
"slices"
1212
"strings"
1313
"time"
1414

1515
"github.com/brianvoe/gofakeit/v7"
16-
"github.com/dominikbraun/graph"
1716
"github.com/google/uuid"
1817
"github.com/jackc/pgx/v5"
1918
"github.com/lib/pq"
@@ -154,7 +153,7 @@ func prepareValue(rawValue string) (string, error) {
154153
return fakerResult, nil
155154
}
156155

157-
func buildQueryForRow(primaryKeys PrimaryKeysResult, rowId string, row Row, dependencyGraph graph.Graph[string, string]) (string, error) {
156+
func buildQueryForRow(primaryKeys PrimaryKeysResult, rowId string, row Row, dependencyGraph map[string][]string) (string, error) {
158157
parts := strings.Split(rowId, ":")
159158
if len(parts) < 2 {
160159
return "", fmt.Errorf("invalid id: %s", rowId)
@@ -203,12 +202,8 @@ func buildQueryForRow(primaryKeys PrimaryKeysResult, rowId string, row Row, depe
203202
default:
204203
return "", fmt.Errorf("cannot parse ~dependencies value in row %s", rowId)
205204
}
206-
for _, dependency := range dependencies {
207-
err := dependencyGraph.AddEdge(rowId, dependency)
208-
if isRealGraphError(err) {
209-
return "", err
210-
}
211-
}
205+
dependencyGraph[rowId] = append(dependencyGraph[rowId], dependencies...)
206+
dependencyGraph[rowId] = slices.Compact(dependencyGraph[rowId])
212207
continue
213208
}
214209

@@ -225,10 +220,8 @@ func buildQueryForRow(primaryKeys PrimaryKeysResult, rowId string, row Row, depe
225220
addEdge := referenceRegex.MatchString(value)
226221
// Don't add edges to and from the same row.
227222
if addEdge && rowId != value {
228-
err := dependencyGraph.AddEdge(rowId, value)
229-
if isRealGraphError(err) {
230-
return "", err
231-
}
223+
dependencyGraph[rowId] = append(dependencyGraph[rowId], value)
224+
dependencyGraph[rowId] = slices.Compact(dependencyGraph[rowId])
232225
}
233226

234227
columns = append(columns, pq.QuoteIdentifier(column))
@@ -265,15 +258,12 @@ func buildQueryForRow(primaryKeys PrimaryKeysResult, rowId string, row Row, depe
265258

266259
// Returns a sorted array of queries to run based on a given ripoff file.
267260
func buildQueriesForRipoff(primaryKeys PrimaryKeysResult, totalRipoff RipoffFile) ([]string, error) {
268-
dependencyGraph := graph.New(graph.StringHash, graph.Directed(), graph.Acyclic())
261+
dependencyGraph := map[string][]string{}
269262
queries := map[string]string{}
270263

271264
// Add vertexes first, since rows can be in any order.
272265
for rowId := range totalRipoff.Rows {
273-
err := dependencyGraph.AddVertex(rowId)
274-
if err != nil {
275-
return []string{}, err
276-
}
266+
dependencyGraph[rowId] = []string{}
277267
}
278268

279269
// Build queries.
@@ -286,7 +276,10 @@ func buildQueriesForRipoff(primaryKeys PrimaryKeysResult, totalRipoff RipoffFile
286276
}
287277

288278
// Sort and reverse the graph, so queries are in order of least (hopefully none) to most dependencies.
289-
ordered, _ := graph.TopologicalSort(dependencyGraph)
279+
ordered, err := topologicalSort(dependencyGraph)
280+
if err != nil {
281+
return []string{}, err
282+
}
290283
sortedQueries := []string{}
291284
for i := len(ordered) - 1; i >= 0; i-- {
292285
query, ok := queries[ordered[i]]
@@ -392,9 +385,38 @@ func getForeignKeysResult(ctx context.Context, conn pgx.Tx) (ForeignKeysResult,
392385
return result, nil
393386
}
394387

395-
func isRealGraphError(err error) bool {
396-
if err == nil || errors.Is(err, graph.ErrEdgeAlreadyExists) {
397-
return false
388+
// Copy of github.com/amwolff/gorder DFS topological sort implementation,
389+
// with the only change being allowing non-acyclic graphs (for better or worse).
390+
func topologicalSort(digraph map[string][]string) ([]string, error) {
391+
var (
392+
acyclic = true
393+
order []string
394+
permanentMark = make(map[string]bool)
395+
temporaryMark = make(map[string]bool)
396+
visit func(string)
397+
)
398+
399+
visit = func(u string) {
400+
if temporaryMark[u] {
401+
acyclic = false
402+
} else if !(temporaryMark[u] || permanentMark[u]) {
403+
temporaryMark[u] = true
404+
for _, v := range digraph[u] {
405+
visit(v)
406+
if !acyclic {
407+
slog.Debug("Ripoff file appears to have cycle", slog.String("rowId", u))
408+
}
409+
}
410+
delete(temporaryMark, u)
411+
permanentMark[u] = true
412+
order = append([]string{u}, order...)
413+
}
414+
}
415+
416+
for u := range digraph {
417+
if !permanentMark[u] {
418+
visit(u)
419+
}
398420
}
399-
return true
421+
return order, nil
400422
}

export.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,20 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx) (RipoffFile, error) {
101101
if slices.Contains(primaryKeys, field.Name) {
102102
ids = append(ids, columnVal)
103103
}
104+
foreignKey, isFkey := singleColumnFkeyMap[[2]string{table, field.Name}]
104105
// No need to export primary keys due to inference from schema on import.
105106
if len(primaryKeys) == 1 && primaryKeys[0] == field.Name {
107+
// The primary key is a foreign key, we'll need explicit dependencies.
108+
if isFkey && columnVal != "" {
109+
dependencies, ok := ripoffRow["~dependencies"].([]string)
110+
if !ok {
111+
ripoffRow["~dependencies"] = []string{}
112+
}
113+
ripoffRow["~dependencies"] = append(dependencies, fmt.Sprintf("%s:literal(%s)", foreignKey.ToTable, columnVal))
114+
}
106115
continue
107116
}
108117
// If this is a foreign key, should ensure it uses the table:valueFunc() format.
109-
foreignKey, isFkey := singleColumnFkeyMap[[2]string{table, field.Name}]
110118
if isFkey && columnVal != "" {
111119
// Does the referenced table have more than one primary key, or does the constraint not point to a primary key?
112120
// Then is a foreign key to a non-primary key, we need to fill this info in later.

go.mod

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ toolchain go1.22.4
66

77
require (
88
github.com/brianvoe/gofakeit/v7 v7.0.4
9-
github.com/dominikbraun/graph v0.23.0
109
github.com/google/uuid v1.6.0
1110
github.com/jackc/pgx/v5 v5.6.0
1211
github.com/lib/pq v1.10.9

go.sum

-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
44
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
55
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
66
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
7-
github.com/dominikbraun/graph v0.23.0 h1:TdZB4pPqCLFxYhdyMFb1TBdFxp8XLcJfTTBQucVPgCo=
8-
github.com/dominikbraun/graph v0.23.0/go.mod h1:yOjYyogZLY1LSG9E33JWZJiq5k83Qy2C6POAuiViluc=
97
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
108
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
119
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=

testdata/import/dependencies/dependencies.yml

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ rows:
66
url: image.png
77
avatar_modifiers:uuid(fooBarAvatar):
88
grayscale: true
9+
~dependencies: [avatars:uuid(fooBarAvatar)]

testdata/import/templates/template_user.yml

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ rows:
66
url: {{ .avatarUrl }}
77
avatar_modifiers:uuid({{ .rowId }}):
88
grayscale: {{ .avatarGrayscale }}
9+
~dependencies: [avatars:uuid({{ .rowId }})]

0 commit comments

Comments
 (0)