From c72e212c91892bc6a45a71eba6572ee9f6603c7b Mon Sep 17 00:00:00 2001 From: "A.Unger" Date: Wed, 28 Oct 2020 11:59:08 +0100 Subject: [PATCH 1/3] first implementation of our own query tree and tree resolution added godata dep use correct godata dependency, enhance tests refactor around indexer search tree tests reflecting currently used queries fix static checker fix linter replace old regexp with a calll to indexer.Query fix linter remove offenses --- accounts/go.mod | 1 + accounts/go.sum | 2 + accounts/pkg/indexer/helper.go | 15 +++ accounts/pkg/indexer/indexer.go | 150 +++++++++++++++++++++++++++ accounts/pkg/indexer/indexer_test.go | 57 +++++++--- accounts/pkg/indexer/query_tree.go | 40 +++++++ accounts/pkg/indexer/test/data.go | 10 ++ accounts/pkg/service/v0/accounts.go | 90 +--------------- ocis/go.sum | 4 + 9 files changed, 268 insertions(+), 101 deletions(-) create mode 100644 accounts/pkg/indexer/helper.go create mode 100644 accounts/pkg/indexer/query_tree.go diff --git a/accounts/go.mod b/accounts/go.mod index a13239f3cd..25e1d64a9a 100644 --- a/accounts/go.mod +++ b/accounts/go.mod @@ -3,6 +3,7 @@ module github.com/owncloud/ocis/accounts go 1.13 require ( + github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 github.com/cs3org/go-cs3apis v0.0.0-20200810113633-b00aca449666 github.com/cs3org/reva v1.2.2-0.20200924071957-e6676516e61e github.com/go-chi/chi v4.1.2+incompatible diff --git a/accounts/go.sum b/accounts/go.sum index 07f8c42a24..98d42798a8 100644 --- a/accounts/go.sum +++ b/accounts/go.sum @@ -50,6 +50,8 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/Masterminds/goutils v1.1.0 h1:zukEsf/1JZwCMgHiK3GZftabmxiCw4apj3a28RPBiVg= github.com/Masterminds/goutils v1.1.0/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= diff --git a/accounts/pkg/indexer/helper.go b/accounts/pkg/indexer/helper.go new file mode 100644 index 0000000000..8591beb1d7 --- /dev/null +++ b/accounts/pkg/indexer/helper.go @@ -0,0 +1,15 @@ +package indexer + +// dedup removes duplicate values in given slice +func dedup(s []string) []string { + for i := 0; i < len(s); i++ { + for i2 := i + 1; i2 < len(s); i2++ { + if s[i] == s[i2] { + // delete + s = append(s[:i2], s[i2+1:]...) + i2-- + } + } + } + return s +} diff --git a/accounts/pkg/indexer/indexer.go b/accounts/pkg/indexer/indexer.go index 2c23ffe20e..33207bc4d7 100644 --- a/accounts/pkg/indexer/indexer.go +++ b/accounts/pkg/indexer/indexer.go @@ -4,7 +4,9 @@ package indexer import ( "fmt" "path" + "strings" + "github.com/CiscoM31/godata" "github.com/iancoleman/strcase" "github.com/owncloud/ocis/accounts/pkg/config" "github.com/owncloud/ocis/accounts/pkg/indexer/errors" @@ -236,3 +238,151 @@ func (i Indexer) Update(from, to interface{}) error { return nil } + +// Query parses an OData query into something our indexer.Index understands and resolves it. +func (i Indexer) Query(t interface{}, q string) ([]string, error) { + query, err := godata.ParseFilterString(q) + if err != nil { + return nil, err + } + + tree := newQueryTree() + if err := buildTreeFromOdataQuery(query.Tree, &tree); err != nil { + return nil, err + } + + results := make([]string, 0) + if err := i.resolveTree(t, &tree, &results); err != nil { + return nil, err + } + + return results, nil +} + +// t is used to infer the indexed field names. When building an index search query, field names have to respect Golang +// conventions and be in PascalCase. For a better overview on this contemplate reading the reflection package under the +// indexer directory. Traversal of the tree happens in a pre-order fashion. +// TODO implement logic for `and` operators. +func (i Indexer) resolveTree(t interface{}, tree *queryTree, partials *[]string) error { + if partials == nil { + return fmt.Errorf("return value cannot be nil: partials") + } + + if tree.left != nil { + _ = i.resolveTree(t, tree.left, partials) + } + + if tree.right != nil { + _ = i.resolveTree(t, tree.right, partials) + } + + // by the time we're here we reached a leaf node. + if tree.token != nil { + switch tree.token.filterType { + case "FindBy": + operand, err := sanitizeInput(tree.token.operands) + if err != nil { + return err + } + + r, err := i.FindBy(t, operand.field, operand.value) + if err != nil { + return err + } + + *partials = append(*partials, r...) + case "FindByPartial": + operand, err := sanitizeInput(tree.token.operands) + if err != nil { + return err + } + + r, err := i.FindByPartial(t, operand.field, fmt.Sprintf("%v*", operand.value)) + if err != nil { + return err + } + + *partials = append(*partials, r...) + default: + return fmt.Errorf("unsupported filter: %v", tree.token.filterType) + } + } + + *partials = dedup(*partials) + return nil +} + +type indexerTuple struct { + field, value string +} + +// sanitizeInput returns a tuple of fieldName + value to be applied on indexer.Index filters. +func sanitizeInput(operands []string) (*indexerTuple, error) { + if len(operands) != 2 { + return nil, fmt.Errorf("invalid number of operands for filter function: got %v expected 2", len(operands)) + } + + // field names are Go public types and by design they are in PascalCase, therefore we need to adhere to this rules. + // for further information on this have a look at the reflection package. + f := strcase.ToCamel(operands[0]) + + // remove single quotes from value. + v := strings.ReplaceAll(operands[1], "'", "") + return &indexerTuple{ + field: f, + value: v, + }, nil +} + +// buildTreeFromOdataQuery builds an indexer.queryTree out of a GOData ParseNode. The purpose of this intermediate tree +// is to transform godata operators and functions into supported operations on our index. At the time of this writing +// we only support `FindBy` and `FindByPartial` queries as these are the only implemented filters on indexer.Index(es). +func buildTreeFromOdataQuery(root *godata.ParseNode, tree *queryTree) error { + if root.Token.Type == godata.FilterTokenFunc { // i.e "startswith", "contains" + switch root.Token.Value { + case "startswith": + token := token{ + operator: root.Token.Value, + filterType: "FindByPartial", + // TODO sanitize the number of operands it the expected one. + operands: []string{ + root.Children[0].Token.Value, // field name, i.e: Name + root.Children[1].Token.Value, // field value, i.e: Jac + }, + } + + tree.insert(&token) + default: + return fmt.Errorf("operation not supported") + } + } + + if root.Token.Type == godata.FilterTokenLogical { + switch root.Token.Value { + case "or": + tree.insert(&token{operator: root.Token.Value}) + for _, child := range root.Children { + if err := buildTreeFromOdataQuery(child, tree.left); err != nil { + return err + } + } + case "eq": + tree.insert(&token{ + operator: root.Token.Value, + filterType: "FindBy", + operands: []string{ + root.Children[0].Token.Value, + root.Children[1].Token.Value, + }, + }) + for _, child := range root.Children { + if err := buildTreeFromOdataQuery(child, tree.left); err != nil { + return err + } + } + default: + return fmt.Errorf("operator not supported") + } + } + return nil +} diff --git a/accounts/pkg/indexer/indexer_test.go b/accounts/pkg/indexer/indexer_test.go index 942adddf23..54d59ecf82 100644 --- a/accounts/pkg/indexer/indexer_test.go +++ b/accounts/pkg/indexer/indexer_test.go @@ -251,18 +251,51 @@ func TestIndexer_Disk_UpdateWithNonUniqueIndex(t *testing.T) { _ = os.RemoveAll(dataDir) } -//func createCs3Indexer() *Indexer { -// return CreateIndexer(&config.Config{ -// Repo: config.Repo{ -// CS3: config.CS3{ -// ProviderAddr: "0.0.0.0:9215", -// DataURL: "http://localhost:9216", -// DataPrefix: "data", -// JWTSecret: "Pive-Fumkiu4", -// }, -// }, -// }) -//} +func TestQueryDiskImpl(t *testing.T) { + dataDir, err := WriteIndexTestData(Data, "ID", "") + assert.NoError(t, err) + indexer := createDiskIndexer(dataDir) + + err = indexer.AddIndex(&Account{}, "OnPremisesSamAccountName", "ID", "accounts", "non_unique", nil, false) + assert.NoError(t, err) + + err = indexer.AddIndex(&Account{}, "Mail", "ID", "accounts", "non_unique", nil, false) + assert.NoError(t, err) + + err = indexer.AddIndex(&Account{}, "ID", "ID", "accounts", "non_unique", nil, false) + assert.NoError(t, err) + + acc := Account{ + ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2", + Mail: "spooky@skeletons.org", + OnPremisesSamAccountName: "MrDootDoot", + } + + _, err = indexer.Add(acc) + assert.NoError(t, err) + + r, err := indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "mail eq 'spooky@skeletons.org'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot' or mail eq 'spooky@skeletons.org'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "startswith(on_premises_sam_account_name,'MrDoo')") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "id eq 'ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2' or on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + _ = os.RemoveAll(dataDir) +} func createDiskIndexer(dataDir string) *Indexer { return CreateIndexer(&config.Config{ diff --git a/accounts/pkg/indexer/query_tree.go b/accounts/pkg/indexer/query_tree.go new file mode 100644 index 0000000000..1177cc0853 --- /dev/null +++ b/accounts/pkg/indexer/query_tree.go @@ -0,0 +1,40 @@ +package indexer + +type queryTree struct { + token *token + root bool + left *queryTree + right *queryTree +} + +// token to be resolved by the index +type token struct { + operator string // original OData operator. i.e: 'startswith', `or`, `and`. + filterType string // equivalent operator from OData -> indexer i.e FindByPartial or FindBy. + operands []string +} + +// newQueryTree constructs a new tree with a root node. +func newQueryTree() queryTree { + return queryTree{ + root: true, + } +} + +// insert populates first the LHS of the tree first, if this is not possible it fills the RHS. +func (t *queryTree) insert(tkn *token) { + if t != nil && t.root { + t.left = &queryTree{token: tkn} + return + } + + if t.left == nil { + t.left = &queryTree{token: tkn} + return + } + + if t.left != nil && t.right == nil { + t.right = &queryTree{token: tkn} + return + } +} diff --git a/accounts/pkg/indexer/test/data.go b/accounts/pkg/indexer/test/data.go index 98b2916a36..d36552f651 100644 --- a/accounts/pkg/indexer/test/data.go +++ b/accounts/pkg/indexer/test/data.go @@ -19,6 +19,13 @@ type Pet struct { UID int } +// Account mocks an ocis account. +type Account struct { + ID string + OnPremisesSamAccountName string + Mail string +} + // Data mock data. var Data = map[string][]interface{}{ "users": { @@ -33,6 +40,9 @@ var Data = map[string][]interface{}{ Pet{ID: "goefe-789", Kind: "Hog", Color: "Green", Name: "Dicky"}, Pet{ID: "xadaf-189", Kind: "Hog", Color: "Green", Name: "Ricky"}, }, + "accounts": { + Account{ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2", Mail: "spooky@skeletons.org", OnPremisesSamAccountName: "MrDootDoot"}, + }, } // WriteIndexTestData writes mock data to disk. diff --git a/accounts/pkg/service/v0/accounts.go b/accounts/pkg/service/v0/accounts.go index 45021bbe8e..c26d861102 100644 --- a/accounts/pkg/service/v0/accounts.go +++ b/accounts/pkg/service/v0/accounts.go @@ -197,83 +197,7 @@ func (s Service) ListAccounts(ctx context.Context, in *proto.ListAccountsRequest } func (s Service) findAccountsByQuery(ctx context.Context, query string) ([]string, error) { - var searchResults []string - var err error - - // TODO: more explicit queries have to be on top - var onPremOrMailQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)' or mail eq '(.*)'$`) - match := onPremOrMailQuery.FindStringSubmatch(query) - if len(match) == 3 { - resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*") - if err != nil { - return nil, err - } - resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[2]+"*") - if err != nil { - return nil, err - } - - searchResults = append(resSam, resMail...) - return unique(searchResults), nil - } - - // startswith(on_premises_sam_account_name,'mar') or startswith(display_name,'mar') or startswith(mail,'mar') - var searchQuery = regexp.MustCompile(`^startswith\(on_premises_sam_account_name,'(.*)'\) or startswith\(display_name,'(.*)'\) or startswith\(mail,'(.*)'\)$`) - match = searchQuery.FindStringSubmatch(query) - if len(match) == 4 { - resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*") - if err != nil { - return nil, err - } - resDisp, err := s.index.FindByPartial(&proto.Account{}, "DisplayName", match[2]+"*") - if err != nil { - return nil, err - } - resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[3]+"*") - if err != nil { - return nil, err - } - - searchResults = append(resSam, append(resDisp, resMail...)...) - return unique(searchResults), nil - } - - // id eq 'marie' or on_premises_sam_account_name eq 'marie' - // id eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c' or on_premises_sam_account_name eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c' - var idOrQuery = regexp.MustCompile(`^id eq '(.*)' or on_premises_sam_account_name eq '(.*)'$`) - match = idOrQuery.FindStringSubmatch(query) - if len(match) == 3 { - qID, qSam := match[1], match[2] - tmp := &proto.Account{} - err = s.repo.LoadAccount(ctx, qID, tmp) - if err != nil && !storage.IsNotFoundErr(err) { - return nil, err - } - searchResults, err = s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", qSam) - if err != nil { - return nil, err - } - - if tmp.Id != "" { - searchResults = append(searchResults, tmp.Id) - } - - return unique(searchResults), nil - } - - var onPremQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)'$`) // TODO how is ' escaped in the password? - match = onPremQuery.FindStringSubmatch(query) - if len(match) == 2 { - return s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", match[1]) - } - - var mailQuery = regexp.MustCompile(`^mail eq '(.*)'$`) - match = mailQuery.FindStringSubmatch(query) - if len(match) == 2 { - return s.index.FindBy(&proto.Account{}, "Mail", match[1]) - } - - return nil, merrors.BadRequest(s.id, "unsupported query %s", query) + return s.index.Query(&proto.Account{}, query) } // GetAccount implements the AccountsServiceHandler interface @@ -762,18 +686,6 @@ func (s Service) debugLogAccount(a *proto.Account) *zerolog.Event { }) } -func unique(strSlice []string) []string { - keys := make(map[string]bool) - list := []string{} - for _, entry := range strSlice { - if _, value := keys[entry]; !value { - keys[entry] = true - list = append(list, entry) - } - } - return list -} - func (s Service) accountExists(ctx context.Context, username, mail, id string) (exists bool, err error) { var ids []string ids, err = s.index.FindBy(&proto.Account{}, "preferred_name", username) diff --git a/ocis/go.sum b/ocis/go.sum index e2d77c9646..eeb1656e84 100644 --- a/ocis/go.sum +++ b/ocis/go.sum @@ -55,6 +55,10 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc h1:W4GbdgLvIYP02W1CFpqQbMC6fxEfKHYNlmVIt3QmByk= +github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e h1:Bqtt5C+uVk+vH/t5dmB47uDCTwxw16EYHqvJnmY2aQc= github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e/go.mod h1:njRCDrl+1RQ/A/+KVU8Ho2EWAxUSkohOWczdW3dzDG0= From 636caea2172c8d4c63efa42deb62015b0f7b7087 Mon Sep 17 00:00:00 2001 From: Alex Unger Date: Thu, 29 Oct 2020 15:21:10 +0100 Subject: [PATCH 2/3] add index on account.Id due to a rogue query --- accounts/pkg/service/v0/index.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/accounts/pkg/service/v0/index.go b/accounts/pkg/service/v0/index.go index 3bb75e4399..a3a1202947 100644 --- a/accounts/pkg/service/v0/index.go +++ b/accounts/pkg/service/v0/index.go @@ -32,6 +32,10 @@ func (s Service) RebuildIndex(ctx context.Context, request *proto.RebuildIndexRe // recreateContainers adds all indices to the indexer that we have for this service. func recreateContainers(idx *indexer.Indexer, cfg *config.Config) error { // Accounts + if err := idx.AddIndex(&proto.Account{}, "Id", "Id", "accounts", "unique", nil, true); err != nil { + return err + } + if err := idx.AddIndex(&proto.Account{}, "DisplayName", "Id", "accounts", "non_unique", nil, true); err != nil { return err } From 6761f709595965c375b8e854f1ba6908cf763919 Mon Sep 17 00:00:00 2001 From: Alex Unger Date: Thu, 29 Oct 2020 16:09:20 +0100 Subject: [PATCH 3/3] try non unique? --- accounts/pkg/service/v0/index.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accounts/pkg/service/v0/index.go b/accounts/pkg/service/v0/index.go index a3a1202947..33922325ae 100644 --- a/accounts/pkg/service/v0/index.go +++ b/accounts/pkg/service/v0/index.go @@ -32,7 +32,7 @@ func (s Service) RebuildIndex(ctx context.Context, request *proto.RebuildIndexRe // recreateContainers adds all indices to the indexer that we have for this service. func recreateContainers(idx *indexer.Indexer, cfg *config.Config) error { // Accounts - if err := idx.AddIndex(&proto.Account{}, "Id", "Id", "accounts", "unique", nil, true); err != nil { + if err := idx.AddIndex(&proto.Account{}, "Id", "Id", "accounts", "non_unique", nil, true); err != nil { return err }