From 098a2206263b9967145b5eaf11f07fda63d43995 Mon Sep 17 00:00:00 2001 From: fschade Date: Wed, 30 Jul 2025 16:53:50 +0200 Subject: [PATCH] enhancement(search): implement kql to os dsl structure compilation and add basic tests --- services/search/pkg/opensearch/engine.go | 9 +- .../search/pkg/opensearch/internal/test/os.go | 5 +- .../internal/test/testdata/resource_full.json | 3 +- services/search/pkg/opensearch/kql.go | 92 ++++++---- services/search/pkg/opensearch/kql_test.go | 166 +++++++++++++++++- 5 files changed, 230 insertions(+), 45 deletions(-) diff --git a/services/search/pkg/opensearch/engine.go b/services/search/pkg/opensearch/engine.go index b8acef982d..584d8ddff3 100644 --- a/services/search/pkg/opensearch/engine.go +++ b/services/search/pkg/opensearch/engine.go @@ -29,12 +29,17 @@ func (e *Engine) Search(ctx context.Context, sir *searchService.SearchIndexReque return nil, fmt.Errorf("failed to build query: %w", err) } - query, err := KQL{}.Compile(ast) + compiler, err := NewKQL() + if err != nil { + return nil, fmt.Errorf("failed to create KQL compiler: %w", err) + } + + query, err := compiler.Compile(ast) if err != nil { return nil, fmt.Errorf("failed to compile query: %w", err) } - body, err := query.MarshalJSON() + body, err := NewRootQuery(query).MarshalJSON() if err != nil { return nil, fmt.Errorf("failed to marshal query: %w", err) } diff --git a/services/search/pkg/opensearch/internal/test/os.go b/services/search/pkg/opensearch/internal/test/os.go index 126dae1ea3..1f10741e8f 100644 --- a/services/search/pkg/opensearch/internal/test/os.go +++ b/services/search/pkg/opensearch/internal/test/os.go @@ -84,7 +84,8 @@ func (tc *TestClient) IndicesRefresh(ctx context.Context, indices []string, allo Indices: indices, }) - if err != nil && !(resp != nil && slices.Contains(allow, resp.Inspect().Response.StatusCode)) { + isAllowed := resp != nil && slices.Contains(allow, resp.Inspect().Response.StatusCode) + if err != nil && !isAllowed { return fmt.Errorf("failed to refresh indices %v: %w", indices, err) } @@ -102,7 +103,7 @@ func (tc *TestClient) IndicesDelete(ctx context.Context, indices []string) error switch { case err != nil: return fmt.Errorf("failed to delete indices: %w", err) - case resp.Acknowledged != true: + case !resp.Acknowledged: return errors.New("indices deletion not acknowledged") default: return nil diff --git a/services/search/pkg/opensearch/internal/test/testdata/resource_full.json b/services/search/pkg/opensearch/internal/test/testdata/resource_full.json index a5f3ea8611..d19798fa24 100644 --- a/services/search/pkg/opensearch/internal/test/testdata/resource_full.json +++ b/services/search/pkg/opensearch/internal/test/testdata/resource_full.json @@ -1,6 +1,7 @@ { + "ID" : "1$2!3", "Title" : "dumme title", - "Name" : "dummy name", + "Name" : "dummy", "Content" : "dummy content", "Size" : 42, "Mtime" : "2025-07-24 15:15:01.324093 +0200 CEST m=+0.000056251", diff --git a/services/search/pkg/opensearch/kql.go b/services/search/pkg/opensearch/kql.go index 8b1adc46ba..acd9a0ff90 100644 --- a/services/search/pkg/opensearch/kql.go +++ b/services/search/pkg/opensearch/kql.go @@ -1,51 +1,33 @@ package opensearch import ( - "errors" + "fmt" "strings" "github.com/opencloud-eu/opencloud/pkg/ast" + "github.com/opencloud-eu/opencloud/pkg/kql" ) type KQL struct{} -func (k KQL) Compile(a *ast.Ast) (*RootQuery, error) { - switch { - case len(a.Nodes) == 0: - return nil, errors.New("no nodes in AST") - case len(a.Nodes) == 1: - builder, err := k.getBuilder(a.Nodes[0]) - if err != nil { - return nil, err - } - return NewRootQuery(builder), nil - } - - return nil, nil +func NewKQL() (*KQL, error) { + return &KQL{}, nil } -func (k KQL) getBuilder(someNode ast.Node) (Builder, error) { - var query Builder - switch node := someNode.(type) { - case *ast.StringNode: - field := k.mapField(node.Key) - switch spaces := strings.Split(node.Value, " "); { - case len(spaces) == 1: - query = NewTermQuery[string](field).Value(node.Value) - case len(spaces) > 1: - query = NewMatchPhraseQuery(field).Query(node.Value) - } +func (k *KQL) Compile(tree *ast.Ast) (Builder, error) { + q, err := k.compile(tree.Nodes) + if err != nil { + return nil, err } - - return query, nil + return q, nil } -func (k KQL) mapField(field string) string { - if field == "" { +func (k *KQL) getFieldName(name string) string { + if name == "" { return "Name" } - mappings := map[string]string{ + var _fields = map[string]string{ "rootid": "RootID", "path": "Path", "id": "ID", @@ -60,9 +42,53 @@ func (k KQL) mapField(field string) string { "hidden": "Hidden", } - if mapped, ok := mappings[strings.ToLower(field)]; ok { - return mapped + switch n, ok := _fields[strings.ToLower(name)]; { + case ok: + return n + default: + return name + } +} + +func (k *KQL) getOperatorValueAt(nodes []ast.Node, i int) string { + if i < 0 || i >= len(nodes) { + return "" } - return field + if opn, ok := nodes[i].(*ast.OperatorNode); ok { + return opn.Value + } + + return "" +} + +func (k *KQL) compile(nodes []ast.Node) (Builder, error) { + boolQuery := NewBoolQuery() + + add := boolQuery.Must + for i, node := range nodes { + + prevOp := k.getOperatorValueAt(nodes, i-1) + nextOp := k.getOperatorValueAt(nodes, i+1) + + switch { + case nextOp == kql.BoolOR || prevOp == kql.BoolOR: + add = boolQuery.Should + case nextOp == kql.BoolAND || prevOp == kql.BoolAND: + add = boolQuery.Must + } + + switch node := node.(type) { + case *ast.StringNode: + add(NewTermQuery[string](k.getFieldName(node.Key)).Value(node.Value)) + case *ast.GroupNode: + group, err := k.compile(node.Nodes) + if err != nil { + return nil, fmt.Errorf("failed to build group: %w", err) + } + add(group) + } + } + + return boolQuery, nil } diff --git a/services/search/pkg/opensearch/kql_test.go b/services/search/pkg/opensearch/kql_test.go index 303f7e849f..874dba765b 100644 --- a/services/search/pkg/opensearch/kql_test.go +++ b/services/search/pkg/opensearch/kql_test.go @@ -11,29 +11,181 @@ import ( func TestKQL_Compile(t *testing.T) { tests := []tableTest[*ast.Ast, opensearch.Builder]{ + // field name tests { - name: "federated", + name: "Name is the default field", got: &ast.Ast{ Nodes: []ast.Node{ - &ast.StringNode{Value: "federated"}, + &ast.StringNode{Value: "moby di*"}, }, }, - want: opensearch.NewRootQuery(opensearch.NewTermQuery[string]("Name").Value("federated")), + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewTermQuery[string]("Name").Value("moby di*"), + ), }, { - name: "John Smith", + name: "remaps known field names", got: &ast.Ast{ Nodes: []ast.Node{ - &ast.StringNode{Value: "John Smith"}, + &ast.StringNode{Key: "mediatype", Value: "application/gzip"}, }, }, - want: opensearch.NewRootQuery(opensearch.NewMatchPhraseQuery("Name").Query("John Smith")), + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewTermQuery[string]("MimeType").Value("application/gzip"), + ), + }, + // kql to os dsl - type tests + // kql to os dsl - structure tests + { + name: "[*]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.StringNode{Key: "name", Value: "moby di*"}, + }, + }, + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewTermQuery[string]("Name").Value("moby di*"), + ), + }, + { + name: "[* *]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.StringNode{Key: "name", Value: "moby di*"}, + &ast.StringNode{Key: "age", Value: "32"}, + }, + }, + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewTermQuery[string]("Name").Value("moby di*"), + opensearch.NewTermQuery[string]("age").Value("32"), + ), + }, + { + name: "[* AND *]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.StringNode{Key: "name", Value: "moby di*"}, + &ast.OperatorNode{Value: "AND"}, + &ast.StringNode{Key: "age", Value: "32"}, + }, + }, + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewTermQuery[string]("Name").Value("moby di*"), + opensearch.NewTermQuery[string]("age").Value("32"), + ), + }, + { + name: "[* OR *]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.StringNode{Key: "name", Value: "moby di*"}, + &ast.OperatorNode{Value: "OR"}, + &ast.StringNode{Key: "age", Value: "32"}, + }, + }, + want: opensearch.NewBoolQuery(). + Should( + opensearch.NewTermQuery[string]("Name").Value("moby di*"), + opensearch.NewTermQuery[string]("age").Value("32"), + ), + }, + { + name: "[* OR * OR *]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.StringNode{Key: "name", Value: "moby di*"}, + &ast.OperatorNode{Value: "OR"}, + &ast.StringNode{Key: "age", Value: "32"}, + &ast.OperatorNode{Value: "OR"}, + &ast.StringNode{Key: "age", Value: "44"}, + }, + }, + want: opensearch.NewBoolQuery(). + Should( + opensearch.NewTermQuery[string]("Name").Value("moby di*"), + opensearch.NewTermQuery[string]("age").Value("32"), + opensearch.NewTermQuery[string]("age").Value("44"), + ), + }, + { + name: "[* AND * OR *]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.StringNode{Key: "a", Value: "a"}, + &ast.OperatorNode{Value: "AND"}, + &ast.StringNode{Key: "b", Value: "b"}, + &ast.OperatorNode{Value: "OR"}, + &ast.StringNode{Key: "c", Value: "c"}, + }, + }, + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewTermQuery[string]("a").Value("a"), + ). + Should( + opensearch.NewTermQuery[string]("b").Value("b"), + opensearch.NewTermQuery[string]("c").Value("c"), + ), + }, + { + name: "[* OR * AND *]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.StringNode{Key: "a", Value: "a"}, + &ast.OperatorNode{Value: "OR"}, + &ast.StringNode{Key: "b", Value: "b"}, + &ast.OperatorNode{Value: "AND"}, + &ast.StringNode{Key: "c", Value: "c"}, + }, + }, + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewTermQuery[string]("c").Value("c"), + ). + Should( + opensearch.NewTermQuery[string]("a").Value("a"), + opensearch.NewTermQuery[string]("b").Value("b"), + ), + }, + { + name: "[[* OR * OR *] AND *]", + got: &ast.Ast{ + Nodes: []ast.Node{ + &ast.GroupNode{Nodes: []ast.Node{ + &ast.StringNode{Key: "a", Value: "a"}, + &ast.OperatorNode{Value: "OR"}, + &ast.StringNode{Key: "b", Value: "b"}, + &ast.OperatorNode{Value: "OR"}, + &ast.StringNode{Key: "c", Value: "c"}, + }}, + &ast.OperatorNode{Value: "AND"}, + &ast.StringNode{Key: "d", Value: "d"}, + }, + }, + want: opensearch.NewBoolQuery(). + Must( + opensearch.NewBoolQuery(). + Should( + opensearch.NewTermQuery[string]("a").Value("a"), + opensearch.NewTermQuery[string]("b").Value("b"), + opensearch.NewTermQuery[string]("c").Value("c"), + ), + opensearch.NewTermQuery[string]("d").Value("d"), + ), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - got, err := opensearch.KQL{}.Compile(test.got) + compiler, err := opensearch.NewKQL() + assert.NoError(t, err) + + got, err := compiler.Compile(test.got) assert.NoError(t, err) gotJSON, err := got.MarshalJSON()